google_speech 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -26,7 +26,29 @@ Or install it yourself as:
26
26
 
27
27
  ## Usage
28
28
 
29
- TODO: Write usage instructions here
29
+ require 'google_speech'
30
+
31
+ f = File.open '/Users/you/Downloads/audio.wav'
32
+ transcriber = GoogleSpeech::Transcriber.new(f)
33
+ t = transcriber.transcribe
34
+
35
+ Options:
36
+ * language - what language is the speech in
37
+ * chunk_duration - length in seconds for each audio chunk of the wav to send
38
+ * overlap - chunking does not respect word boundaries; overlap can compensate
39
+ * max_results - # of results to request of speech api
40
+ * request_pause - sleep seconds between chunk transcription requests
41
+ * profanity_filter - google by default filters profanity, this gem does not
42
+
43
+ Default option values:
44
+ {
45
+ :language => 'en-US',
46
+ :chunk_duration => 8,
47
+ :overlap => 1,
48
+ :max_results => 2,
49
+ :request_pause => 1,
50
+ :profanity_filter => false
51
+ }
30
52
 
31
53
  ## Contributing
32
54
 
data/Rakefile CHANGED
@@ -1 +1,7 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ desc "Default Task (test gem)"
5
+ task :default => :test
6
+
7
+ Rake::TestTask.new(:test) { |t| t.test_files = FileList['test/*_test.rb'] }
data/bin/google_speech ADDED
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'google_speech'
4
+
5
+ if ARGV[0].nil? || !File.exist?(ARGV[0])
6
+ STDERR.puts "usage: #{$0} input.wav"
7
+ exit(1)
8
+ end
9
+
10
+ f = File.open ARGV[0]
11
+ transcriber = GoogleSpeech::Transcriber.new(f)
12
+ t = transcriber.transcribe
13
+ puts t.inspect
@@ -1,6 +1,8 @@
1
1
  # -*- encoding: utf-8 -*-
2
+
2
3
  lib = File.expand_path('../lib', __FILE__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
4
6
  require 'google_speech/version'
5
7
 
6
8
  Gem::Specification.new do |gem|
data/lib/google_speech.rb CHANGED
@@ -1,203 +1,12 @@
1
- require 'rubygems'
2
- require 'google_speech/version'
1
+ # -*- encoding: utf-8 -*-
3
2
 
4
- require 'excon'
5
-
6
- require 'tempfile'
7
- require 'open3'
8
- require 'logger'
9
- require 'cgi'
10
- require 'json'
3
+ require 'rubygems'
11
4
 
12
5
  module GoogleSpeech
13
-
14
- SOX_ERROR_RE = /error:/
15
-
16
- class Transcriber
17
- attr_accessor :original_file, :options, :results
18
-
19
- def initialize(original_file, options=nil)
20
- @original_file = original_file
21
- @options = {:language=>'en-US', :chunk_duration=>8, :overlap=>1, :max_results=>2}.merge(options || {})
22
- @results = []
23
- end
24
-
25
- def transcribe
26
- ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap]).each{|chunk|
27
- result = chunk.to_hash
28
- transcript = transcribe_data(chunk.data)
29
- result[:text] = transcript['hypotheses'].first['utterance']
30
- result[:confidence] = transcript['hypotheses'].first['confidence']
31
- @results << result
32
- puts "\n#{result[:start_time]} - #{result[:start_time].to_i + result[:duration].to_i}: #{(result[:confidence].to_f * 100).to_i}%: #{result[:text]}"
33
- sleep(1)
34
- }
35
- @results
36
- end
37
-
38
- def transcribe_data(data)
39
- params = {
40
- :scheme => 'https',
41
- :host => 'www.google.com',
42
- :port => 443,
43
- :path => "/speech-api/v1/recognize",
44
- :query => "xjerr=1&client=google_speech&lang=#{options[:language]}&maxresults=#{options[:max_results].to_i}",
45
- :body => data,
46
- :method => 'POST',
47
- :headers => {
48
- 'Content-Type' => 'audio/x-flac; rate=16000',
49
- 'Content-Length' => data.bytesize,
50
- 'User-Agent' => "google_speech"
51
- }
52
- }
53
- retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
54
- retry_count = 0
55
- result = nil
56
- url = "#{params[:scheme]}://#{params[:host]}:#{params[:port]}#{params[:path]}"
57
- while(!result && retry_count < retry_max)
58
- connection = Excon.new(url)
59
- response = connection.request(params)
60
- if response.status.to_s.start_with?('2')
61
- result = JSON.parse(response.body)
62
- else
63
- sleep(1)
64
- retry_count += 1
65
- end
66
- end
67
-
68
- result
69
- end
70
-
71
- end
72
-
73
- # break wav audio into short files
74
- class ChunkFactory
75
- attr_accessor :original_file, :chunk_duration, :overlap
76
-
77
- def initialize(original_file, chunk_duration=8, overlap=1)
78
- @chunk_duration = chunk_duration.to_i
79
- @original_file = original_file
80
- @overlap = overlap
81
- @original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path).to_i
82
- end
83
-
84
- # return temp file for each chunk
85
- def each
86
- pos = 0
87
- while(pos < @original_duration) do
88
- chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
89
- yield chunk
90
- pos = pos + [chunk.duration, @chunk_duration].min
91
- end
92
- end
93
- end
94
-
95
- class Chunk
96
- attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
97
-
98
- def initialize(original_file, original_duration, start_time, duration)
99
- @original_file = original_file
100
- @original_duration = original_duration.to_i
101
- @start_time = start_time.to_i
102
- @duration = [duration.to_i, (@original_duration - @start_time)].min
103
- @chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
104
- # puts "@chunk_file: #{@chunk_file.path}"
105
- Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
106
- end
107
-
108
- def to_hash
109
- {
110
- :start_time => @start_time,
111
- :duration => @duration
112
- }
113
- end
114
-
115
- def data
116
- @data ||= @chunk_file.read
117
- end
118
- end
119
-
120
- # send each to google api
121
-
122
- class Utility
123
- class <<self
124
-
125
- def audio_file_duration(path)
126
- check_local_file(path)
127
-
128
- soxi_duration, err = run_command("soxi -V0 -D #{path}", :nice=>false, :echo_return=>false)
129
- duration = soxi_duration.chomp.to_f
130
- duration
131
- end
132
-
133
- def trim_to_flac(wav_path, duration, flac_path, start, length)
134
- check_local_file(wav_path)
135
-
136
- command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i} rate 16k"
137
- out, err = run_command(command)
138
- response = out + err
139
- response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
140
- end
141
-
142
- # Pass the command to run, and various options
143
- # :timeout - seconds to wait for command to complete, defaults to 2 hours
144
- # :echo_return - gets the return value via appended '; echo $?', true by default
145
- # :nice - call with nice -19 by default, set to false to stop, or integer to set different level
146
- def run_command(command, options={})
147
- timeout = options[:timeout] || 7200
148
-
149
- # default to adding a nice 19 if nothing specified
150
- nice = if options.key?(:nice)
151
- !options[:nice] ? '' : "nice -n #{options[:nice].to_i} "
152
- else
153
- 'nice -n 19 '
154
- end
155
-
156
- echo_return = (options.key?(:echo_return) && !options[:echo_return]) ? '' : '; echo $?'
157
-
158
- cmd = "#{nice}#{command}#{echo_return}"
159
-
160
- # logger.debug "run_command: #{cmd}"
161
- begin
162
- result = Timeout::timeout(timeout) {
163
- Open3::popen3(cmd) do |i,o,e|
164
- out_str = ""
165
- err_str = ""
166
- i.close # important!
167
- o.sync = true
168
- e.sync = true
169
- o.each{|line|
170
- out_str << line
171
- line.chomp!
172
- # logger.debug "stdout: #{line}"
173
- }
174
- e.each { |line|
175
- err_str << line
176
- line.chomp!
177
- # logger.debug "stderr: #{line}"
178
- }
179
- return out_str, err_str
180
- end
181
- }
182
- rescue Timeout::Error => toe
183
- # logger.debug "run_command:Timeout Error - running command, took longer than #{timeout} seconds to execute: '#{cmd}'"
184
- raise toe
185
- end
186
- end
187
-
188
- def check_local_file(file_path)
189
- raise "File missing or 0 length: #{file_path}" unless (File.size?(file_path).to_i > 0)
190
- end
191
-
192
- def logger
193
- @logger ||= Logger.new(STDOUT)
194
- end
195
-
196
- def logger=(l)
197
- @logger = l
198
- end
199
-
200
- end
201
- end
202
-
203
6
  end
7
+
8
+ require 'google_speech/version'
9
+ require 'google_speech/utility'
10
+ require 'google_speech/chunk'
11
+ require 'google_speech/chunk_factory'
12
+ require 'google_speech/transcriber'
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'tempfile'
4
+
5
+ module GoogleSpeech
6
+
7
+ class Chunk
8
+ attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
9
+
10
+ def initialize(original_file, original_duration, start_time, duration)
11
+ @original_file = original_file
12
+ @original_duration = original_duration.to_i
13
+ @start_time = start_time.to_i
14
+ @duration = [duration.to_i, (@original_duration - @start_time)].min
15
+ @chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
16
+ # puts "@chunk_file: #{@chunk_file.path}"
17
+ Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
18
+ end
19
+
20
+ def to_hash
21
+ {
22
+ :start_time => @start_time,
23
+ :duration => @duration
24
+ }
25
+ end
26
+
27
+ def data
28
+ @data ||= @chunk_file.read
29
+ end
30
+ end
31
+
32
+ end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module GoogleSpeech
4
+
5
+ # break wav audio into short files
6
+ class ChunkFactory
7
+ attr_accessor :original_file, :chunk_duration, :overlap
8
+
9
+ def initialize(original_file, chunk_duration=8, overlap=1)
10
+ @chunk_duration = chunk_duration.to_i
11
+ @original_file = original_file
12
+ @overlap = overlap
13
+ @original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path).to_i
14
+ end
15
+
16
+ # return temp file for each chunk
17
+ def each
18
+ pos = 0
19
+ while(pos < @original_duration) do
20
+ chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
21
+ yield chunk
22
+ pos = pos + [chunk.duration, @chunk_duration].min
23
+ end
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,81 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'excon'
4
+ require 'json'
5
+
6
+ module GoogleSpeech
7
+
8
+ class Transcriber
9
+ attr_accessor :original_file, :options, :results
10
+
11
+ DEFAULT_OPTIONS = {
12
+ :language => 'en-US',
13
+ :chunk_duration => 8,
14
+ :overlap => 1,
15
+ :max_results => 2,
16
+ :request_pause => 1,
17
+ :profanity_filter => false
18
+ }
19
+
20
+ def initialize(original_file, options=nil)
21
+ @original_file = original_file
22
+ @options = DEFAULT_OPTIONS.merge(options || {})
23
+ @results = []
24
+ end
25
+
26
+ def transcribe
27
+ chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap])
28
+ chunk_factory.each{ |chunk|
29
+ result = chunk.to_hash
30
+ transcript = transcribe_data(chunk.data)
31
+ result[:text] = transcript['hypotheses'].first['utterance']
32
+ result[:confidence] = transcript['hypotheses'].first['confidence']
33
+ @results << result
34
+
35
+ # puts "\n#{result[:start_time]} - #{result[:start_time].to_i + result[:duration].to_i}: #{(result[:confidence].to_f * 100).to_i}%: #{result[:text]}"
36
+
37
+ sleep(options[:request_pause].to_i)
38
+ }
39
+ @results
40
+ end
41
+
42
+ def pfilter
43
+ options[:profanity_filter] ? '1' : '0'
44
+ end
45
+
46
+ def transcribe_data(data)
47
+ params = {
48
+ :scheme => 'https',
49
+ :host => 'www.google.com',
50
+ :port => 443,
51
+ :path => "/speech-api/v1/recognize",
52
+ :query => "xjerr=1&client=google_speech&lang=#{options[:language]}&maxresults=#{options[:max_results].to_i}&pfilter=#{pfilter}",
53
+ :body => data,
54
+ :method => 'POST',
55
+ :headers => {
56
+ 'Content-Type' => 'audio/x-flac; rate=16000',
57
+ 'Content-Length' => data.bytesize,
58
+ 'User-Agent' => "google_speech"
59
+ }
60
+ }
61
+ retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
62
+ retry_count = 0
63
+ result = nil
64
+ url = "#{params[:scheme]}://#{params[:host]}:#{params[:port]}#{params[:path]}"
65
+ while(!result && retry_count < retry_max)
66
+ connection = Excon.new(url)
67
+ response = connection.request(params)
68
+ if response.status.to_s.start_with?('2')
69
+ result = JSON.parse(response.body)
70
+ else
71
+ sleep(1)
72
+ retry_count += 1
73
+ end
74
+ end
75
+
76
+ result
77
+ end
78
+
79
+ end
80
+
81
+ end
@@ -0,0 +1,93 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'tempfile'
4
+ require 'open3'
5
+ require 'logger'
6
+
7
+ module GoogleSpeech
8
+
9
+ class Utility
10
+
11
+ SOX_ERROR_RE = /error:/
12
+
13
+ class <<self
14
+
15
+ def audio_file_duration(path)
16
+ check_local_file(path)
17
+
18
+ soxi_duration, err = run_command("soxi -V0 -D #{path}", :nice=>false, :echo_return=>false)
19
+ duration = soxi_duration.chomp.to_f
20
+ duration
21
+ end
22
+
23
+ def trim_to_flac(wav_path, duration, flac_path, start, length)
24
+ check_local_file(wav_path)
25
+
26
+ command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i} rate 16k"
27
+ out, err = run_command(command)
28
+ response = out + err
29
+ response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
30
+ end
31
+
32
+ # Pass the command to run, and various options
33
+ # :timeout - seconds to wait for command to complete, defaults to 2 hours
34
+ # :echo_return - gets the return value via appended '; echo $?', true by default
35
+ # :nice - call with nice -19 by default, set to false to stop, or integer to set different level
36
+ def run_command(command, options={})
37
+ timeout = options[:timeout] || 7200
38
+
39
+ # default to adding a nice 19 if nothing specified
40
+ nice = if options.key?(:nice)
41
+ !options[:nice] ? '' : "nice -n #{options[:nice].to_i} "
42
+ else
43
+ 'nice -n 19 '
44
+ end
45
+
46
+ echo_return = (options.key?(:echo_return) && !options[:echo_return]) ? '' : '; echo $?'
47
+
48
+ cmd = "#{nice}#{command}#{echo_return}"
49
+
50
+ # logger.debug "run_command: #{cmd}"
51
+ begin
52
+ result = Timeout::timeout(timeout) {
53
+ Open3::popen3(cmd) do |i,o,e|
54
+ out_str = ""
55
+ err_str = ""
56
+ i.close # important!
57
+ o.sync = true
58
+ e.sync = true
59
+ o.each{|line|
60
+ out_str << line
61
+ line.chomp!
62
+ # logger.debug "stdout: #{line}"
63
+ }
64
+ e.each { |line|
65
+ err_str << line
66
+ line.chomp!
67
+ # logger.debug "stderr: #{line}"
68
+ }
69
+ return out_str, err_str
70
+ end
71
+ }
72
+ rescue Timeout::Error => toe
73
+ # logger.debug "run_command:Timeout Error - running command, took longer than #{timeout} seconds to execute: '#{cmd}'"
74
+ raise toe
75
+ end
76
+ end
77
+
78
+ def check_local_file(file_path)
79
+ raise "File missing or 0 length: #{file_path}" unless (File.size?(file_path).to_i > 0)
80
+ end
81
+
82
+ def logger
83
+ @logger ||= Logger.new(STDOUT)
84
+ end
85
+
86
+ def logger=(l)
87
+ @logger = l
88
+ end
89
+
90
+ end
91
+ end
92
+
93
+ end
@@ -1,3 +1,5 @@
1
+ # -*- encoding: utf-8 -*-
2
+
1
3
  module GoogleSpeech
2
- VERSION = "0.0.1"
4
+ VERSION = "0.0.2"
3
5
  end
@@ -0,0 +1,5 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'minitest/autorun'
4
+ $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
5
+ require 'google_speech'
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'test_helper.rb'
4
+
5
+ describe Transcriber do
6
+ before do
7
+ end
8
+
9
+ describe 'load file' do
10
+ f = File.open '/Users/andrew/Downloads/hive.wav'
11
+ # f = File.open('/Users/andrew/dev/projects/nu_wav/test/files/test_basic.wav')
12
+ transcriber = GoogleSpeech::Transcriber.new(f)
13
+ t = transcriber.transcribe
14
+ puts t.inspect
15
+ end
16
+
17
+ # describe "when asked about cheeseburgers" do
18
+ # it "must respond positively" do
19
+ # @meme.i_can_has_cheezburger?.must_equal "OHAI!"
20
+ # end
21
+ # end
22
+
23
+ # describe "when asked about blending possibilities" do
24
+ # it "won't say no" do
25
+ # @meme.will_it_blend?.wont_match /^no/i
26
+ # end
27
+ # end
28
+
29
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_speech
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -46,7 +46,8 @@ dependencies:
46
46
  description: This is a gem to call the google speech api.
47
47
  email:
48
48
  - andrew@prx.org
49
- executables: []
49
+ executables:
50
+ - google_speech
50
51
  extensions: []
51
52
  extra_rdoc_files: []
52
53
  files:
@@ -55,9 +56,16 @@ files:
55
56
  - LICENSE.txt
56
57
  - README.md
57
58
  - Rakefile
59
+ - bin/google_speech
58
60
  - google_speech.gemspec
59
61
  - lib/google_speech.rb
62
+ - lib/google_speech/chunk.rb
63
+ - lib/google_speech/chunk_factory.rb
64
+ - lib/google_speech/transcriber.rb
65
+ - lib/google_speech/utility.rb
60
66
  - lib/google_speech/version.rb
67
+ - test/test_helper.rb
68
+ - test/transcriber_test.rb
61
69
  homepage: ''
62
70
  licenses: []
63
71
  post_install_message:
@@ -70,17 +78,24 @@ required_ruby_version: !ruby/object:Gem::Requirement
70
78
  - - ! '>='
71
79
  - !ruby/object:Gem::Version
72
80
  version: '0'
81
+ segments:
82
+ - 0
83
+ hash: -2361961218730724122
73
84
  required_rubygems_version: !ruby/object:Gem::Requirement
74
85
  none: false
75
86
  requirements:
76
87
  - - ! '>='
77
88
  - !ruby/object:Gem::Version
78
89
  version: '0'
90
+ segments:
91
+ - 0
92
+ hash: -2361961218730724122
79
93
  requirements: []
80
94
  rubyforge_project:
81
95
  rubygems_version: 1.8.23
82
96
  signing_key:
83
97
  specification_version: 3
84
98
  summary: This is a gem to call the google speech api.
85
- test_files: []
86
- has_rdoc:
99
+ test_files:
100
+ - test/test_helper.rb
101
+ - test/transcriber_test.rb