google_speech 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/google_speech/chunk.rb +8 -7
- data/lib/google_speech/chunk_factory.rb +4 -3
- data/lib/google_speech/transcriber.rb +51 -17
- data/lib/google_speech/utility.rb +4 -7
- data/lib/google_speech/version.rb +1 -1
- data/spec/transcriber_spec.rb +1 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 308bea0bbf8297bbe42f197906b271e789453629
|
4
|
+
data.tar.gz: 4829db0fc7ceecf1c91de0b044f108bf3016eea3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b536794d1d6ee96227f2a21a3928a11d83583ef982597b872520422510a873bc1cbd431cffe8f4f52a6428a74d54900aa39a2ffffc0d8b054659c5a15704792
|
7
|
+
data.tar.gz: 28d88d63f1a08eccd9c36010a7cc0bfe89292487029c96528648529428b02210c9ebe09d26700e436b6157a67f733c35555d1fe10c1776336b928c3b093597c2
|
data/lib/google_speech/chunk.rb
CHANGED
@@ -5,16 +5,17 @@ require 'tempfile'
|
|
5
5
|
module GoogleSpeech
|
6
6
|
|
7
7
|
class Chunk
|
8
|
-
attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
|
8
|
+
attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file, :rate
|
9
9
|
|
10
|
-
def initialize(original_file, original_duration, start_time, duration)
|
11
|
-
@original_file
|
10
|
+
def initialize(original_file, original_duration, start_time, duration, rate)
|
11
|
+
@original_file = original_file
|
12
12
|
@original_duration = original_duration
|
13
|
-
@start_time
|
14
|
-
@duration
|
15
|
-
@
|
13
|
+
@start_time = start_time
|
14
|
+
@duration = [duration, (@original_duration - @start_time)].min
|
15
|
+
@rate = rate
|
16
|
+
@chunk_file = Tempfile.new([File.basename(@original_file), '.wav'])
|
16
17
|
# puts "@chunk_file: #{@chunk_file.path}"
|
17
|
-
Utility.
|
18
|
+
Utility.trim_and_encode(@original_file.path, @chunk_file.path, @start_time, @duration, @rate)
|
18
19
|
end
|
19
20
|
|
20
21
|
def to_hash
|
@@ -4,12 +4,13 @@ module GoogleSpeech
|
|
4
4
|
|
5
5
|
# break wav audio into short files
|
6
6
|
class ChunkFactory
|
7
|
-
attr_accessor :original_file, :chunk_duration, :overlap
|
7
|
+
attr_accessor :original_file, :chunk_duration, :overlap, :rate
|
8
8
|
|
9
|
-
def initialize(original_file, chunk_duration
|
9
|
+
def initialize(original_file, chunk_duration, overlap, rate)
|
10
10
|
@chunk_duration = chunk_duration
|
11
11
|
@original_file = original_file
|
12
12
|
@overlap = overlap
|
13
|
+
@rate = rate
|
13
14
|
@original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path)
|
14
15
|
end
|
15
16
|
|
@@ -17,7 +18,7 @@ module GoogleSpeech
|
|
17
18
|
def each
|
18
19
|
pos = 0
|
19
20
|
while(pos < @original_duration) do
|
20
|
-
chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
|
21
|
+
chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap), @rate)
|
21
22
|
yield chunk
|
22
23
|
pos = pos + [chunk.duration, @chunk_duration].min
|
23
24
|
end
|
@@ -10,22 +10,27 @@ module GoogleSpeech
|
|
10
10
|
|
11
11
|
DEFAULT_OPTIONS = {
|
12
12
|
:key => 'AIzaSyCnl6MRydhw_5fLXIdASxkLJzcJh5iX0M4',
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
13
|
+
:client => SecureRandom.hex,
|
14
|
+
:audio_type => 'audio/l16',
|
15
|
+
:rate => 8000,
|
16
|
+
:language => 'en-us',
|
17
|
+
:chunk_duration => 4.0,
|
18
|
+
:overlap => 0.25,
|
16
19
|
:max_results => 1,
|
17
|
-
:request_pause => 1,
|
18
|
-
:profanity_filter => true
|
20
|
+
:request_pause => 0.1,
|
21
|
+
:profanity_filter => true,
|
22
|
+
:retry_max => 3
|
19
23
|
}
|
20
24
|
|
21
25
|
def initialize(original_file, options=nil)
|
22
26
|
@original_file = original_file
|
23
27
|
@options = DEFAULT_OPTIONS.merge(options || {})
|
24
28
|
@results = []
|
29
|
+
@last_ua = 0
|
25
30
|
end
|
26
31
|
|
27
32
|
def transcribe
|
28
|
-
chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap])
|
33
|
+
chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap], options[:rate])
|
29
34
|
chunk_factory.each{ |chunk|
|
30
35
|
result = chunk.to_hash
|
31
36
|
transcript = transcribe_data(chunk.data)
|
@@ -63,43 +68,72 @@ module GoogleSpeech
|
|
63
68
|
options[:profanity_filter] ? '1' : '0'
|
64
69
|
end
|
65
70
|
|
71
|
+
def user_agent
|
72
|
+
ua_strings = [
|
73
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36',
|
74
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
|
75
|
+
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10',
|
76
|
+
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36',
|
77
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36',
|
78
|
+
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36',
|
79
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36',
|
80
|
+
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36',
|
81
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36',
|
82
|
+
'Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36',
|
83
|
+
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36',
|
84
|
+
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36',
|
85
|
+
'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36',
|
86
|
+
'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36',
|
87
|
+
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
|
88
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
|
89
|
+
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
|
90
|
+
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
|
91
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
|
92
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36'
|
93
|
+
]
|
94
|
+
ua = ua_strings[(@last_ua % ua_strings.length)]
|
95
|
+
@last_ua += 1
|
96
|
+
ua
|
97
|
+
end
|
98
|
+
|
66
99
|
def transcribe_data(data)
|
67
100
|
params = {
|
68
101
|
:path => "/speech-api/v2/recognize",
|
69
|
-
:query => "output=json&client
|
102
|
+
:query => "output=json&key=#{options[:key]}&client=#{options[:client]}&lang=#{options[:language]}",
|
70
103
|
:body => data,
|
71
104
|
:method => 'POST',
|
72
105
|
:headers => {
|
73
|
-
'Content-Type' =>
|
106
|
+
'Content-Type' => "#{options[:audio_type]}; rate=#{options[:rate]}",
|
74
107
|
'Content-Length' => data.bytesize,
|
75
|
-
'User-Agent' =>
|
108
|
+
'User-Agent' => user_agent
|
76
109
|
}
|
77
110
|
}
|
111
|
+
# puts "data size: #{data.bytesize}"
|
78
112
|
retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
|
79
113
|
retry_count = 0
|
80
114
|
result = nil
|
81
|
-
url = "https://www.google.com
|
115
|
+
url = "https://www.google.com#{params[:path]}"
|
82
116
|
while(!result && retry_count < retry_max)
|
83
117
|
retry_count += 1
|
84
|
-
|
85
118
|
begin
|
86
119
|
connection = Excon.new(url)
|
87
120
|
response = connection.request(params)
|
88
121
|
# puts "response: #{response.inspect}\n\n"
|
89
|
-
# puts "response.
|
90
|
-
|
122
|
+
# puts "response.headers:\n#{response.headers}\n"
|
123
|
+
# puts "response.body:'#{response.body}'\n"
|
124
|
+
if response.status.to_s.start_with?('2') && response.body != "{\"result\":[]}\n"
|
91
125
|
result = []
|
92
126
|
if (response.body && response.body.size > 0)
|
93
127
|
result = response.body.split("\n").collect{|b| JSON.parse(b)} rescue []
|
94
128
|
end
|
95
129
|
else
|
96
|
-
logger.error "transcribe_data
|
97
|
-
sleep(
|
130
|
+
logger.error " transcribe_data retrycount(#{retry_count}): status: #{response.status}, response: #{response.body.chomp}"
|
131
|
+
sleep(options[:request_pause].to_i)
|
98
132
|
end
|
99
133
|
rescue StandardError => err
|
100
134
|
#need to do something to retry this - use new a13g func for this.
|
101
|
-
logger.error "transcribe_data retrycount(#{retry_count}): error: #{err.message}"
|
102
|
-
sleep(
|
135
|
+
logger.error " transcribe_data retrycount(#{retry_count}): error: #{err.message}"
|
136
|
+
sleep(options[:request_pause].to_i)
|
103
137
|
end
|
104
138
|
|
105
139
|
end
|
@@ -19,16 +19,13 @@ module GoogleSpeech
|
|
19
19
|
duration
|
20
20
|
end
|
21
21
|
|
22
|
-
def
|
22
|
+
def trim_and_encode(wav_path, flac_path, start, length, rate)
|
23
23
|
check_local_file(wav_path)
|
24
24
|
|
25
|
-
command = "sox -t wav '#{wav_path}' -
|
26
|
-
|
27
|
-
# command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start} #{length} rate 8000"
|
28
|
-
|
25
|
+
command = "sox -t wav '#{wav_path}' -t wav '#{flac_path}' norm channels 1 rate #{rate} trim #{start} #{length} compand .5,2 -80,-80,-75,-50,-30,-15,0,0"
|
29
26
|
out, err = run_command(command)
|
30
27
|
response = out + err
|
31
|
-
response.split("\n").each{ |l| raise("
|
28
|
+
response.split("\n").each{ |l| raise("trim_and_encode: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
|
32
29
|
end
|
33
30
|
|
34
31
|
# Pass the command to run, and various options
|
@@ -49,7 +46,7 @@ module GoogleSpeech
|
|
49
46
|
|
50
47
|
cmd = "#{nice}#{command}#{echo_return}"
|
51
48
|
|
52
|
-
logger.info "google_speech - run_command: #{cmd}"
|
49
|
+
# logger.info "google_speech - run_command: #{cmd}"
|
53
50
|
begin
|
54
51
|
result = Timeout::timeout(timeout) {
|
55
52
|
Open3::popen3(cmd) do |i,o,e|
|
data/spec/transcriber_spec.rb
CHANGED
@@ -9,11 +9,10 @@ describe GoogleSpeech::Transcriber do
|
|
9
9
|
|
10
10
|
transcriber = GoogleSpeech::Transcriber.new(f)
|
11
11
|
t = transcriber.transcribe
|
12
|
-
# puts "\n\nt: #{t.inspect}\n\n"
|
13
12
|
|
14
13
|
t.size.must_equal 2
|
15
14
|
t.first[:start_time].must_equal 0
|
16
|
-
t.first[:end_time].must_equal
|
15
|
+
t.first[:end_time].must_equal 4.25
|
17
16
|
t.first[:text].wont_be_nil
|
18
17
|
t.first[:confidence].wont_be_nil
|
19
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google_speech
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kuklewicz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: excon
|