google_speech 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google_speech/chunk.rb +8 -7
- data/lib/google_speech/chunk_factory.rb +4 -3
- data/lib/google_speech/transcriber.rb +51 -17
- data/lib/google_speech/utility.rb +4 -7
- data/lib/google_speech/version.rb +1 -1
- data/spec/transcriber_spec.rb +1 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 308bea0bbf8297bbe42f197906b271e789453629
|
4
|
+
data.tar.gz: 4829db0fc7ceecf1c91de0b044f108bf3016eea3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b536794d1d6ee96227f2a21a3928a11d83583ef982597b872520422510a873bc1cbd431cffe8f4f52a6428a74d54900aa39a2ffffc0d8b054659c5a15704792
|
7
|
+
data.tar.gz: 28d88d63f1a08eccd9c36010a7cc0bfe89292487029c96528648529428b02210c9ebe09d26700e436b6157a67f733c35555d1fe10c1776336b928c3b093597c2
|
data/lib/google_speech/chunk.rb
CHANGED
@@ -5,16 +5,17 @@ require 'tempfile'
|
|
5
5
|
module GoogleSpeech
|
6
6
|
|
7
7
|
class Chunk
|
8
|
-
attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
|
8
|
+
attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file, :rate
|
9
9
|
|
10
|
-
def initialize(original_file, original_duration, start_time, duration)
|
11
|
-
@original_file
|
10
|
+
def initialize(original_file, original_duration, start_time, duration, rate)
|
11
|
+
@original_file = original_file
|
12
12
|
@original_duration = original_duration
|
13
|
-
@start_time
|
14
|
-
@duration
|
15
|
-
@
|
13
|
+
@start_time = start_time
|
14
|
+
@duration = [duration, (@original_duration - @start_time)].min
|
15
|
+
@rate = rate
|
16
|
+
@chunk_file = Tempfile.new([File.basename(@original_file), '.wav'])
|
16
17
|
# puts "@chunk_file: #{@chunk_file.path}"
|
17
|
-
Utility.
|
18
|
+
Utility.trim_and_encode(@original_file.path, @chunk_file.path, @start_time, @duration, @rate)
|
18
19
|
end
|
19
20
|
|
20
21
|
def to_hash
|
@@ -4,12 +4,13 @@ module GoogleSpeech
|
|
4
4
|
|
5
5
|
# break wav audio into short files
|
6
6
|
class ChunkFactory
|
7
|
-
attr_accessor :original_file, :chunk_duration, :overlap
|
7
|
+
attr_accessor :original_file, :chunk_duration, :overlap, :rate
|
8
8
|
|
9
|
-
def initialize(original_file, chunk_duration
|
9
|
+
def initialize(original_file, chunk_duration, overlap, rate)
|
10
10
|
@chunk_duration = chunk_duration
|
11
11
|
@original_file = original_file
|
12
12
|
@overlap = overlap
|
13
|
+
@rate = rate
|
13
14
|
@original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path)
|
14
15
|
end
|
15
16
|
|
@@ -17,7 +18,7 @@ module GoogleSpeech
|
|
17
18
|
def each
|
18
19
|
pos = 0
|
19
20
|
while(pos < @original_duration) do
|
20
|
-
chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
|
21
|
+
chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap), @rate)
|
21
22
|
yield chunk
|
22
23
|
pos = pos + [chunk.duration, @chunk_duration].min
|
23
24
|
end
|
@@ -10,22 +10,27 @@ module GoogleSpeech
|
|
10
10
|
|
11
11
|
DEFAULT_OPTIONS = {
|
12
12
|
:key => 'AIzaSyCnl6MRydhw_5fLXIdASxkLJzcJh5iX0M4',
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
13
|
+
:client => SecureRandom.hex,
|
14
|
+
:audio_type => 'audio/l16',
|
15
|
+
:rate => 8000,
|
16
|
+
:language => 'en-us',
|
17
|
+
:chunk_duration => 4.0,
|
18
|
+
:overlap => 0.25,
|
16
19
|
:max_results => 1,
|
17
|
-
:request_pause => 1,
|
18
|
-
:profanity_filter => true
|
20
|
+
:request_pause => 0.1,
|
21
|
+
:profanity_filter => true,
|
22
|
+
:retry_max => 3
|
19
23
|
}
|
20
24
|
|
21
25
|
def initialize(original_file, options=nil)
|
22
26
|
@original_file = original_file
|
23
27
|
@options = DEFAULT_OPTIONS.merge(options || {})
|
24
28
|
@results = []
|
29
|
+
@last_ua = 0
|
25
30
|
end
|
26
31
|
|
27
32
|
def transcribe
|
28
|
-
chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap])
|
33
|
+
chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap], options[:rate])
|
29
34
|
chunk_factory.each{ |chunk|
|
30
35
|
result = chunk.to_hash
|
31
36
|
transcript = transcribe_data(chunk.data)
|
@@ -63,43 +68,72 @@ module GoogleSpeech
|
|
63
68
|
options[:profanity_filter] ? '1' : '0'
|
64
69
|
end
|
65
70
|
|
71
|
+
def user_agent
|
72
|
+
ua_strings = [
|
73
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36',
|
74
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
|
75
|
+
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10',
|
76
|
+
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36',
|
77
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36',
|
78
|
+
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36',
|
79
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36',
|
80
|
+
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36',
|
81
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36',
|
82
|
+
'Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36',
|
83
|
+
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36',
|
84
|
+
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36',
|
85
|
+
'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36',
|
86
|
+
'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36',
|
87
|
+
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
|
88
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
|
89
|
+
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
|
90
|
+
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
|
91
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
|
92
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36'
|
93
|
+
]
|
94
|
+
ua = ua_strings[(@last_ua % ua_strings.length)]
|
95
|
+
@last_ua += 1
|
96
|
+
ua
|
97
|
+
end
|
98
|
+
|
66
99
|
def transcribe_data(data)
|
67
100
|
params = {
|
68
101
|
:path => "/speech-api/v2/recognize",
|
69
|
-
:query => "output=json&client
|
102
|
+
:query => "output=json&key=#{options[:key]}&client=#{options[:client]}&lang=#{options[:language]}",
|
70
103
|
:body => data,
|
71
104
|
:method => 'POST',
|
72
105
|
:headers => {
|
73
|
-
'Content-Type' =>
|
106
|
+
'Content-Type' => "#{options[:audio_type]}; rate=#{options[:rate]}",
|
74
107
|
'Content-Length' => data.bytesize,
|
75
|
-
'User-Agent' =>
|
108
|
+
'User-Agent' => user_agent
|
76
109
|
}
|
77
110
|
}
|
111
|
+
# puts "data size: #{data.bytesize}"
|
78
112
|
retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
|
79
113
|
retry_count = 0
|
80
114
|
result = nil
|
81
|
-
url = "https://www.google.com
|
115
|
+
url = "https://www.google.com#{params[:path]}"
|
82
116
|
while(!result && retry_count < retry_max)
|
83
117
|
retry_count += 1
|
84
|
-
|
85
118
|
begin
|
86
119
|
connection = Excon.new(url)
|
87
120
|
response = connection.request(params)
|
88
121
|
# puts "response: #{response.inspect}\n\n"
|
89
|
-
# puts "response.
|
90
|
-
|
122
|
+
# puts "response.headers:\n#{response.headers}\n"
|
123
|
+
# puts "response.body:'#{response.body}'\n"
|
124
|
+
if response.status.to_s.start_with?('2') && response.body != "{\"result\":[]}\n"
|
91
125
|
result = []
|
92
126
|
if (response.body && response.body.size > 0)
|
93
127
|
result = response.body.split("\n").collect{|b| JSON.parse(b)} rescue []
|
94
128
|
end
|
95
129
|
else
|
96
|
-
logger.error "transcribe_data
|
97
|
-
sleep(
|
130
|
+
logger.error " transcribe_data retrycount(#{retry_count}): status: #{response.status}, response: #{response.body.chomp}"
|
131
|
+
sleep(options[:request_pause].to_i)
|
98
132
|
end
|
99
133
|
rescue StandardError => err
|
100
134
|
#need to do something to retry this - use new a13g func for this.
|
101
|
-
logger.error "transcribe_data retrycount(#{retry_count}): error: #{err.message}"
|
102
|
-
sleep(
|
135
|
+
logger.error " transcribe_data retrycount(#{retry_count}): error: #{err.message}"
|
136
|
+
sleep(options[:request_pause].to_i)
|
103
137
|
end
|
104
138
|
|
105
139
|
end
|
@@ -19,16 +19,13 @@ module GoogleSpeech
|
|
19
19
|
duration
|
20
20
|
end
|
21
21
|
|
22
|
-
def
|
22
|
+
def trim_and_encode(wav_path, flac_path, start, length, rate)
|
23
23
|
check_local_file(wav_path)
|
24
24
|
|
25
|
-
command = "sox -t wav '#{wav_path}' -
|
26
|
-
|
27
|
-
# command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start} #{length} rate 8000"
|
28
|
-
|
25
|
+
command = "sox -t wav '#{wav_path}' -t wav '#{flac_path}' norm channels 1 rate #{rate} trim #{start} #{length} compand .5,2 -80,-80,-75,-50,-30,-15,0,0"
|
29
26
|
out, err = run_command(command)
|
30
27
|
response = out + err
|
31
|
-
response.split("\n").each{ |l| raise("
|
28
|
+
response.split("\n").each{ |l| raise("trim_and_encode: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
|
32
29
|
end
|
33
30
|
|
34
31
|
# Pass the command to run, and various options
|
@@ -49,7 +46,7 @@ module GoogleSpeech
|
|
49
46
|
|
50
47
|
cmd = "#{nice}#{command}#{echo_return}"
|
51
48
|
|
52
|
-
logger.info "google_speech - run_command: #{cmd}"
|
49
|
+
# logger.info "google_speech - run_command: #{cmd}"
|
53
50
|
begin
|
54
51
|
result = Timeout::timeout(timeout) {
|
55
52
|
Open3::popen3(cmd) do |i,o,e|
|
data/spec/transcriber_spec.rb
CHANGED
@@ -9,11 +9,10 @@ describe GoogleSpeech::Transcriber do
|
|
9
9
|
|
10
10
|
transcriber = GoogleSpeech::Transcriber.new(f)
|
11
11
|
t = transcriber.transcribe
|
12
|
-
# puts "\n\nt: #{t.inspect}\n\n"
|
13
12
|
|
14
13
|
t.size.must_equal 2
|
15
14
|
t.first[:start_time].must_equal 0
|
16
|
-
t.first[:end_time].must_equal
|
15
|
+
t.first[:end_time].must_equal 4.25
|
17
16
|
t.first[:text].wont_be_nil
|
18
17
|
t.first[:confidence].wont_be_nil
|
19
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google_speech
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kuklewicz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: excon
|