google_speech 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -26,7 +26,29 @@ Or install it yourself as:
26
26
 
27
27
  ## Usage
28
28
 
29
- TODO: Write usage instructions here
29
+ require 'google_speech'
30
+
31
+ f = File.open '/Users/you/Downloads/audio.wav'
32
+ transcriber = GoogleSpeech::Transcriber.new(f)
33
+ t = transcriber.transcribe
34
+
35
+ Options:
36
+ * language - what language is the speech in
37
+ * chunk_duration - length in seconds for each audio chunk of the wav to send
38
+ * overlap - chunking does not respect word boundaries; overlap can compensate
39
+ * max_results - # of results to request of speech api
40
+ * request_pause - sleep seconds between chunk transcription requests
41
+ * profanity_filter - google by default filters profanity, this gem does not
42
+
43
+ Default option values:
44
+ {
45
+ :language => 'en-US',
46
+ :chunk_duration => 8,
47
+ :overlap => 1,
48
+ :max_results => 2,
49
+ :request_pause => 1,
50
+ :profanity_filter => false
51
+ }
30
52
 
31
53
  ## Contributing
32
54
 
data/Rakefile CHANGED
@@ -1 +1,7 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ desc "Default Task (test gem)"
5
+ task :default => :test
6
+
7
+ Rake::TestTask.new(:test) { |t| t.test_files = FileList['test/*_test.rb'] }
data/bin/google_speech ADDED
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'google_speech'
4
+
5
+ if ARGV[0].nil? || !File.exist?(ARGV[0])
6
+ STDERR.puts "usage: #{$0} input.wav"
7
+ exit(1)
8
+ end
9
+
10
+ f = File.open ARGV[0]
11
+ transcriber = GoogleSpeech::Transcriber.new(f)
12
+ t = transcriber.transcribe
13
+ puts t.inspect
@@ -1,6 +1,8 @@
1
1
  # -*- encoding: utf-8 -*-
2
+
2
3
  lib = File.expand_path('../lib', __FILE__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
4
6
  require 'google_speech/version'
5
7
 
6
8
  Gem::Specification.new do |gem|
data/lib/google_speech.rb CHANGED
@@ -1,203 +1,12 @@
1
- require 'rubygems'
2
- require 'google_speech/version'
1
+ # -*- encoding: utf-8 -*-
3
2
 
4
- require 'excon'
5
-
6
- require 'tempfile'
7
- require 'open3'
8
- require 'logger'
9
- require 'cgi'
10
- require 'json'
3
+ require 'rubygems'
11
4
 
12
5
  module GoogleSpeech
13
-
14
- SOX_ERROR_RE = /error:/
15
-
16
- class Transcriber
17
- attr_accessor :original_file, :options, :results
18
-
19
- def initialize(original_file, options=nil)
20
- @original_file = original_file
21
- @options = {:language=>'en-US', :chunk_duration=>8, :overlap=>1, :max_results=>2}.merge(options || {})
22
- @results = []
23
- end
24
-
25
- def transcribe
26
- ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap]).each{|chunk|
27
- result = chunk.to_hash
28
- transcript = transcribe_data(chunk.data)
29
- result[:text] = transcript['hypotheses'].first['utterance']
30
- result[:confidence] = transcript['hypotheses'].first['confidence']
31
- @results << result
32
- puts "\n#{result[:start_time]} - #{result[:start_time].to_i + result[:duration].to_i}: #{(result[:confidence].to_f * 100).to_i}%: #{result[:text]}"
33
- sleep(1)
34
- }
35
- @results
36
- end
37
-
38
- def transcribe_data(data)
39
- params = {
40
- :scheme => 'https',
41
- :host => 'www.google.com',
42
- :port => 443,
43
- :path => "/speech-api/v1/recognize",
44
- :query => "xjerr=1&client=google_speech&lang=#{options[:language]}&maxresults=#{options[:max_results].to_i}",
45
- :body => data,
46
- :method => 'POST',
47
- :headers => {
48
- 'Content-Type' => 'audio/x-flac; rate=16000',
49
- 'Content-Length' => data.bytesize,
50
- 'User-Agent' => "google_speech"
51
- }
52
- }
53
- retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
54
- retry_count = 0
55
- result = nil
56
- url = "#{params[:scheme]}://#{params[:host]}:#{params[:port]}#{params[:path]}"
57
- while(!result && retry_count < retry_max)
58
- connection = Excon.new(url)
59
- response = connection.request(params)
60
- if response.status.to_s.start_with?('2')
61
- result = JSON.parse(response.body)
62
- else
63
- sleep(1)
64
- retry_count += 1
65
- end
66
- end
67
-
68
- result
69
- end
70
-
71
- end
72
-
73
- # break wav audio into short files
74
- class ChunkFactory
75
- attr_accessor :original_file, :chunk_duration, :overlap
76
-
77
- def initialize(original_file, chunk_duration=8, overlap=1)
78
- @chunk_duration = chunk_duration.to_i
79
- @original_file = original_file
80
- @overlap = overlap
81
- @original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path).to_i
82
- end
83
-
84
- # return temp file for each chunk
85
- def each
86
- pos = 0
87
- while(pos < @original_duration) do
88
- chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
89
- yield chunk
90
- pos = pos + [chunk.duration, @chunk_duration].min
91
- end
92
- end
93
- end
94
-
95
- class Chunk
96
- attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
97
-
98
- def initialize(original_file, original_duration, start_time, duration)
99
- @original_file = original_file
100
- @original_duration = original_duration.to_i
101
- @start_time = start_time.to_i
102
- @duration = [duration.to_i, (@original_duration - @start_time)].min
103
- @chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
104
- # puts "@chunk_file: #{@chunk_file.path}"
105
- Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
106
- end
107
-
108
- def to_hash
109
- {
110
- :start_time => @start_time,
111
- :duration => @duration
112
- }
113
- end
114
-
115
- def data
116
- @data ||= @chunk_file.read
117
- end
118
- end
119
-
120
- # send each to google api
121
-
122
- class Utility
123
- class <<self
124
-
125
- def audio_file_duration(path)
126
- check_local_file(path)
127
-
128
- soxi_duration, err = run_command("soxi -V0 -D #{path}", :nice=>false, :echo_return=>false)
129
- duration = soxi_duration.chomp.to_f
130
- duration
131
- end
132
-
133
- def trim_to_flac(wav_path, duration, flac_path, start, length)
134
- check_local_file(wav_path)
135
-
136
- command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i} rate 16k"
137
- out, err = run_command(command)
138
- response = out + err
139
- response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
140
- end
141
-
142
- # Pass the command to run, and various options
143
- # :timeout - seconds to wait for command to complete, defaults to 2 hours
144
- # :echo_return - gets the return value via appended '; echo $?', true by default
145
- # :nice - call with nice -19 by default, set to false to stop, or integer to set different level
146
- def run_command(command, options={})
147
- timeout = options[:timeout] || 7200
148
-
149
- # default to adding a nice 19 if nothing specified
150
- nice = if options.key?(:nice)
151
- !options[:nice] ? '' : "nice -n #{options[:nice].to_i} "
152
- else
153
- 'nice -n 19 '
154
- end
155
-
156
- echo_return = (options.key?(:echo_return) && !options[:echo_return]) ? '' : '; echo $?'
157
-
158
- cmd = "#{nice}#{command}#{echo_return}"
159
-
160
- # logger.debug "run_command: #{cmd}"
161
- begin
162
- result = Timeout::timeout(timeout) {
163
- Open3::popen3(cmd) do |i,o,e|
164
- out_str = ""
165
- err_str = ""
166
- i.close # important!
167
- o.sync = true
168
- e.sync = true
169
- o.each{|line|
170
- out_str << line
171
- line.chomp!
172
- # logger.debug "stdout: #{line}"
173
- }
174
- e.each { |line|
175
- err_str << line
176
- line.chomp!
177
- # logger.debug "stderr: #{line}"
178
- }
179
- return out_str, err_str
180
- end
181
- }
182
- rescue Timeout::Error => toe
183
- # logger.debug "run_command:Timeout Error - running command, took longer than #{timeout} seconds to execute: '#{cmd}'"
184
- raise toe
185
- end
186
- end
187
-
188
- def check_local_file(file_path)
189
- raise "File missing or 0 length: #{file_path}" unless (File.size?(file_path).to_i > 0)
190
- end
191
-
192
- def logger
193
- @logger ||= Logger.new(STDOUT)
194
- end
195
-
196
- def logger=(l)
197
- @logger = l
198
- end
199
-
200
- end
201
- end
202
-
203
6
  end
7
+
8
+ require 'google_speech/version'
9
+ require 'google_speech/utility'
10
+ require 'google_speech/chunk'
11
+ require 'google_speech/chunk_factory'
12
+ require 'google_speech/transcriber'
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'tempfile'
4
+
5
+ module GoogleSpeech
6
+
7
+ class Chunk
8
+ attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
9
+
10
+ def initialize(original_file, original_duration, start_time, duration)
11
+ @original_file = original_file
12
+ @original_duration = original_duration.to_i
13
+ @start_time = start_time.to_i
14
+ @duration = [duration.to_i, (@original_duration - @start_time)].min
15
+ @chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
16
+ # puts "@chunk_file: #{@chunk_file.path}"
17
+ Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
18
+ end
19
+
20
+ def to_hash
21
+ {
22
+ :start_time => @start_time,
23
+ :duration => @duration
24
+ }
25
+ end
26
+
27
+ def data
28
+ @data ||= @chunk_file.read
29
+ end
30
+ end
31
+
32
+ end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module GoogleSpeech
4
+
5
+ # break wav audio into short files
6
+ class ChunkFactory
7
+ attr_accessor :original_file, :chunk_duration, :overlap
8
+
9
+ def initialize(original_file, chunk_duration=8, overlap=1)
10
+ @chunk_duration = chunk_duration.to_i
11
+ @original_file = original_file
12
+ @overlap = overlap
13
+ @original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path).to_i
14
+ end
15
+
16
+ # return temp file for each chunk
17
+ def each
18
+ pos = 0
19
+ while(pos < @original_duration) do
20
+ chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
21
+ yield chunk
22
+ pos = pos + [chunk.duration, @chunk_duration].min
23
+ end
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,81 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'excon'
4
+ require 'json'
5
+
6
+ module GoogleSpeech
7
+
8
+ class Transcriber
9
+ attr_accessor :original_file, :options, :results
10
+
11
+ DEFAULT_OPTIONS = {
12
+ :language => 'en-US',
13
+ :chunk_duration => 8,
14
+ :overlap => 1,
15
+ :max_results => 2,
16
+ :request_pause => 1,
17
+ :profanity_filter => false
18
+ }
19
+
20
+ def initialize(original_file, options=nil)
21
+ @original_file = original_file
22
+ @options = DEFAULT_OPTIONS.merge(options || {})
23
+ @results = []
24
+ end
25
+
26
+ def transcribe
27
+ chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap])
28
+ chunk_factory.each{ |chunk|
29
+ result = chunk.to_hash
30
+ transcript = transcribe_data(chunk.data)
31
+ result[:text] = transcript['hypotheses'].first['utterance']
32
+ result[:confidence] = transcript['hypotheses'].first['confidence']
33
+ @results << result
34
+
35
+ # puts "\n#{result[:start_time]} - #{result[:start_time].to_i + result[:duration].to_i}: #{(result[:confidence].to_f * 100).to_i}%: #{result[:text]}"
36
+
37
+ sleep(options[:request_pause].to_i)
38
+ }
39
+ @results
40
+ end
41
+
42
+ def pfilter
43
+ options[:profanity_filter] ? '1' : '0'
44
+ end
45
+
46
+ def transcribe_data(data)
47
+ params = {
48
+ :scheme => 'https',
49
+ :host => 'www.google.com',
50
+ :port => 443,
51
+ :path => "/speech-api/v1/recognize",
52
+ :query => "xjerr=1&client=google_speech&lang=#{options[:language]}&maxresults=#{options[:max_results].to_i}&pfilter=#{pfilter}",
53
+ :body => data,
54
+ :method => 'POST',
55
+ :headers => {
56
+ 'Content-Type' => 'audio/x-flac; rate=16000',
57
+ 'Content-Length' => data.bytesize,
58
+ 'User-Agent' => "google_speech"
59
+ }
60
+ }
61
+ retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
62
+ retry_count = 0
63
+ result = nil
64
+ url = "#{params[:scheme]}://#{params[:host]}:#{params[:port]}#{params[:path]}"
65
+ while(!result && retry_count < retry_max)
66
+ connection = Excon.new(url)
67
+ response = connection.request(params)
68
+ if response.status.to_s.start_with?('2')
69
+ result = JSON.parse(response.body)
70
+ else
71
+ sleep(1)
72
+ retry_count += 1
73
+ end
74
+ end
75
+
76
+ result
77
+ end
78
+
79
+ end
80
+
81
+ end
@@ -0,0 +1,93 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'tempfile'
4
+ require 'open3'
5
+ require 'logger'
6
+
7
+ module GoogleSpeech
8
+
9
+ class Utility
10
+
11
+ SOX_ERROR_RE = /error:/
12
+
13
+ class <<self
14
+
15
+ def audio_file_duration(path)
16
+ check_local_file(path)
17
+
18
+ soxi_duration, err = run_command("soxi -V0 -D #{path}", :nice=>false, :echo_return=>false)
19
+ duration = soxi_duration.chomp.to_f
20
+ duration
21
+ end
22
+
23
+ def trim_to_flac(wav_path, duration, flac_path, start, length)
24
+ check_local_file(wav_path)
25
+
26
+ command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i} rate 16k"
27
+ out, err = run_command(command)
28
+ response = out + err
29
+ response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
30
+ end
31
+
32
+ # Pass the command to run, and various options
33
+ # :timeout - seconds to wait for command to complete, defaults to 2 hours
34
+ # :echo_return - gets the return value via appended '; echo $?', true by default
35
+ # :nice - call with nice -19 by default, set to false to stop, or integer to set different level
36
+ def run_command(command, options={})
37
+ timeout = options[:timeout] || 7200
38
+
39
+ # default to adding a nice 19 if nothing specified
40
+ nice = if options.key?(:nice)
41
+ !options[:nice] ? '' : "nice -n #{options[:nice].to_i} "
42
+ else
43
+ 'nice -n 19 '
44
+ end
45
+
46
+ echo_return = (options.key?(:echo_return) && !options[:echo_return]) ? '' : '; echo $?'
47
+
48
+ cmd = "#{nice}#{command}#{echo_return}"
49
+
50
+ # logger.debug "run_command: #{cmd}"
51
+ begin
52
+ result = Timeout::timeout(timeout) {
53
+ Open3::popen3(cmd) do |i,o,e|
54
+ out_str = ""
55
+ err_str = ""
56
+ i.close # important!
57
+ o.sync = true
58
+ e.sync = true
59
+ o.each{|line|
60
+ out_str << line
61
+ line.chomp!
62
+ # logger.debug "stdout: #{line}"
63
+ }
64
+ e.each { |line|
65
+ err_str << line
66
+ line.chomp!
67
+ # logger.debug "stderr: #{line}"
68
+ }
69
+ return out_str, err_str
70
+ end
71
+ }
72
+ rescue Timeout::Error => toe
73
+ # logger.debug "run_command:Timeout Error - running command, took longer than #{timeout} seconds to execute: '#{cmd}'"
74
+ raise toe
75
+ end
76
+ end
77
+
78
+ def check_local_file(file_path)
79
+ raise "File missing or 0 length: #{file_path}" unless (File.size?(file_path).to_i > 0)
80
+ end
81
+
82
+ def logger
83
+ @logger ||= Logger.new(STDOUT)
84
+ end
85
+
86
+ def logger=(l)
87
+ @logger = l
88
+ end
89
+
90
+ end
91
+ end
92
+
93
+ end
@@ -1,3 +1,5 @@
1
+ # -*- encoding: utf-8 -*-
2
+
1
3
  module GoogleSpeech
2
- VERSION = "0.0.1"
4
+ VERSION = "0.0.2"
3
5
  end
@@ -0,0 +1,5 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'minitest/autorun'
4
+ $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
5
+ require 'google_speech'
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'test_helper.rb'
4
+
5
+ describe Transcriber do
6
+ before do
7
+ end
8
+
9
+ describe 'load file' do
10
+ f = File.open '/Users/andrew/Downloads/hive.wav'
11
+ # f = File.open('/Users/andrew/dev/projects/nu_wav/test/files/test_basic.wav')
12
+ transcriber = GoogleSpeech::Transcriber.new(f)
13
+ t = transcriber.transcribe
14
+ puts t.inspect
15
+ end
16
+
17
+ # describe "when asked about cheeseburgers" do
18
+ # it "must respond positively" do
19
+ # @meme.i_can_has_cheezburger?.must_equal "OHAI!"
20
+ # end
21
+ # end
22
+
23
+ # describe "when asked about blending possibilities" do
24
+ # it "won't say no" do
25
+ # @meme.will_it_blend?.wont_match /^no/i
26
+ # end
27
+ # end
28
+
29
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_speech
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -46,7 +46,8 @@ dependencies:
46
46
  description: This is a gem to call the google speech api.
47
47
  email:
48
48
  - andrew@prx.org
49
- executables: []
49
+ executables:
50
+ - google_speech
50
51
  extensions: []
51
52
  extra_rdoc_files: []
52
53
  files:
@@ -55,9 +56,16 @@ files:
55
56
  - LICENSE.txt
56
57
  - README.md
57
58
  - Rakefile
59
+ - bin/google_speech
58
60
  - google_speech.gemspec
59
61
  - lib/google_speech.rb
62
+ - lib/google_speech/chunk.rb
63
+ - lib/google_speech/chunk_factory.rb
64
+ - lib/google_speech/transcriber.rb
65
+ - lib/google_speech/utility.rb
60
66
  - lib/google_speech/version.rb
67
+ - test/test_helper.rb
68
+ - test/transcriber_test.rb
61
69
  homepage: ''
62
70
  licenses: []
63
71
  post_install_message:
@@ -70,17 +78,24 @@ required_ruby_version: !ruby/object:Gem::Requirement
70
78
  - - ! '>='
71
79
  - !ruby/object:Gem::Version
72
80
  version: '0'
81
+ segments:
82
+ - 0
83
+ hash: -2361961218730724122
73
84
  required_rubygems_version: !ruby/object:Gem::Requirement
74
85
  none: false
75
86
  requirements:
76
87
  - - ! '>='
77
88
  - !ruby/object:Gem::Version
78
89
  version: '0'
90
+ segments:
91
+ - 0
92
+ hash: -2361961218730724122
79
93
  requirements: []
80
94
  rubyforge_project:
81
95
  rubygems_version: 1.8.23
82
96
  signing_key:
83
97
  specification_version: 3
84
98
  summary: This is a gem to call the google speech api.
85
- test_files: []
86
- has_rdoc:
99
+ test_files:
100
+ - test/test_helper.rb
101
+ - test/transcriber_test.rb