google_speech 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1e8c0b050e64d5ccc7521777e0b320f5544eff3b
4
- data.tar.gz: 7389a2c17862a3a3e31359b721fa9d2eeb8d3c3b
3
+ metadata.gz: 308bea0bbf8297bbe42f197906b271e789453629
4
+ data.tar.gz: 4829db0fc7ceecf1c91de0b044f108bf3016eea3
5
5
  SHA512:
6
- metadata.gz: 5d33ab4b317a1ef3acaa48e9d05c9e47248a8e5b0fc19de80a32729f0b7a37de0fe0d595ea56d63fefe14a78a41ac7e0fe261e53e54f34b2bc6ccf374e7c869c
7
- data.tar.gz: d2c37ebff2fb005d3fa8a69c860104977481f17b545be69ced5fb3c576d4a2940acf8b0ef074de8f715f2a8477dfd0a2c025e48ddf7be15dd286dade706b19ae
6
+ metadata.gz: 7b536794d1d6ee96227f2a21a3928a11d83583ef982597b872520422510a873bc1cbd431cffe8f4f52a6428a74d54900aa39a2ffffc0d8b054659c5a15704792
7
+ data.tar.gz: 28d88d63f1a08eccd9c36010a7cc0bfe89292487029c96528648529428b02210c9ebe09d26700e436b6157a67f733c35555d1fe10c1776336b928c3b093597c2
@@ -5,16 +5,17 @@ require 'tempfile'
5
5
  module GoogleSpeech
6
6
 
7
7
  class Chunk
8
- attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
8
+ attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file, :rate
9
9
 
10
- def initialize(original_file, original_duration, start_time, duration)
11
- @original_file = original_file
10
+ def initialize(original_file, original_duration, start_time, duration, rate)
11
+ @original_file = original_file
12
12
  @original_duration = original_duration
13
- @start_time = start_time
14
- @duration = [duration, (@original_duration - @start_time)].min
15
- @chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
13
+ @start_time = start_time
14
+ @duration = [duration, (@original_duration - @start_time)].min
15
+ @rate = rate
16
+ @chunk_file = Tempfile.new([File.basename(@original_file), '.wav'])
16
17
  # puts "@chunk_file: #{@chunk_file.path}"
17
- Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
18
+ Utility.trim_and_encode(@original_file.path, @chunk_file.path, @start_time, @duration, @rate)
18
19
  end
19
20
 
20
21
  def to_hash
@@ -4,12 +4,13 @@ module GoogleSpeech
4
4
 
5
5
  # break wav audio into short files
6
6
  class ChunkFactory
7
- attr_accessor :original_file, :chunk_duration, :overlap
7
+ attr_accessor :original_file, :chunk_duration, :overlap, :rate
8
8
 
9
- def initialize(original_file, chunk_duration=8, overlap=1)
9
+ def initialize(original_file, chunk_duration, overlap, rate)
10
10
  @chunk_duration = chunk_duration
11
11
  @original_file = original_file
12
12
  @overlap = overlap
13
+ @rate = rate
13
14
  @original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path)
14
15
  end
15
16
 
@@ -17,7 +18,7 @@ module GoogleSpeech
17
18
  def each
18
19
  pos = 0
19
20
  while(pos < @original_duration) do
20
- chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
21
+ chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap), @rate)
21
22
  yield chunk
22
23
  pos = pos + [chunk.duration, @chunk_duration].min
23
24
  end
@@ -10,22 +10,27 @@ module GoogleSpeech
10
10
 
11
11
  DEFAULT_OPTIONS = {
12
12
  :key => 'AIzaSyCnl6MRydhw_5fLXIdASxkLJzcJh5iX0M4',
13
- :language => 'en-US',
14
- :chunk_duration => 5,
15
- :overlap => 0.5,
13
+ :client => SecureRandom.hex,
14
+ :audio_type => 'audio/l16',
15
+ :rate => 8000,
16
+ :language => 'en-us',
17
+ :chunk_duration => 4.0,
18
+ :overlap => 0.25,
16
19
  :max_results => 1,
17
- :request_pause => 1,
18
- :profanity_filter => true
20
+ :request_pause => 0.1,
21
+ :profanity_filter => true,
22
+ :retry_max => 3
19
23
  }
20
24
 
21
25
  def initialize(original_file, options=nil)
22
26
  @original_file = original_file
23
27
  @options = DEFAULT_OPTIONS.merge(options || {})
24
28
  @results = []
29
+ @last_ua = 0
25
30
  end
26
31
 
27
32
  def transcribe
28
- chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap])
33
+ chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap], options[:rate])
29
34
  chunk_factory.each{ |chunk|
30
35
  result = chunk.to_hash
31
36
  transcript = transcribe_data(chunk.data)
@@ -63,43 +68,72 @@ module GoogleSpeech
63
68
  options[:profanity_filter] ? '1' : '0'
64
69
  end
65
70
 
71
+ def user_agent
72
+ ua_strings = [
73
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36',
74
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
75
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10',
76
+ 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36',
77
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36',
78
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36',
79
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36',
80
+ 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36',
81
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36',
82
+ 'Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36',
83
+ 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36',
84
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36',
85
+ 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36',
86
+ 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36',
87
+ 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
88
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
89
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
90
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
91
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
92
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36'
93
+ ]
94
+ ua = ua_strings[(@last_ua % ua_strings.length)]
95
+ @last_ua += 1
96
+ ua
97
+ end
98
+
66
99
  def transcribe_data(data)
67
100
  params = {
68
101
  :path => "/speech-api/v2/recognize",
69
- :query => "output=json&client=chromium&lang=#{options[:language]}&key=#{options[:key]}",
102
+ :query => "output=json&key=#{options[:key]}&client=#{options[:client]}&lang=#{options[:language]}",
70
103
  :body => data,
71
104
  :method => 'POST',
72
105
  :headers => {
73
- 'Content-Type' => 'audio/x-flac; rate=8000',
106
+ 'Content-Type' => "#{options[:audio_type]}; rate=#{options[:rate]}",
74
107
  'Content-Length' => data.bytesize,
75
- 'User-Agent' => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36"
108
+ 'User-Agent' => user_agent
76
109
  }
77
110
  }
111
+ # puts "data size: #{data.bytesize}"
78
112
  retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
79
113
  retry_count = 0
80
114
  result = nil
81
- url = "https://www.google.com:443#{params[:path]}"
115
+ url = "https://www.google.com#{params[:path]}"
82
116
  while(!result && retry_count < retry_max)
83
117
  retry_count += 1
84
-
85
118
  begin
86
119
  connection = Excon.new(url)
87
120
  response = connection.request(params)
88
121
  # puts "response: #{response.inspect}\n\n"
89
- # puts "response.body:\nSTART\n#{response.body}\nEND\n#{response.body.class.name}"
90
- if response.status.to_s.start_with?('2')
122
+ # puts "response.headers:\n#{response.headers}\n"
123
+ # puts "response.body:'#{response.body}'\n"
124
+ if response.status.to_s.start_with?('2') && response.body != "{\"result\":[]}\n"
91
125
  result = []
92
126
  if (response.body && response.body.size > 0)
93
127
  result = response.body.split("\n").collect{|b| JSON.parse(b)} rescue []
94
128
  end
95
129
  else
96
- logger.error "transcribe_data response unsuccessful, status: #{response.status}, response: #{response.inspect}"
97
- sleep(1)
130
+ logger.error " transcribe_data retrycount(#{retry_count}): status: #{response.status}, response: #{response.body.chomp}"
131
+ sleep(options[:request_pause].to_i)
98
132
  end
99
133
  rescue StandardError => err
100
134
  #need to do something to retry this - use new a13g func for this.
101
- logger.error "transcribe_data retrycount(#{retry_count}): error: #{err.message}"
102
- sleep(1)
135
+ logger.error " transcribe_data retrycount(#{retry_count}): error: #{err.message}"
136
+ sleep(options[:request_pause].to_i)
103
137
  end
104
138
 
105
139
  end
@@ -19,16 +19,13 @@ module GoogleSpeech
19
19
  duration
20
20
  end
21
21
 
22
- def trim_to_flac(wav_path, duration, flac_path, start, length)
22
+ def trim_and_encode(wav_path, flac_path, start, length, rate)
23
23
  check_local_file(wav_path)
24
24
 
25
- command = "sox -t wav '#{wav_path}' -r 8000 -c 1 -t flac '#{flac_path}' trim #{start} #{length} compand .5,2 -80,-80,-75,-50,-30,-15,0,0 norm -0.1"
26
-
27
- # command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start} #{length} rate 8000"
28
-
25
+ command = "sox -t wav '#{wav_path}' -t wav '#{flac_path}' norm channels 1 rate #{rate} trim #{start} #{length} compand .5,2 -80,-80,-75,-50,-30,-15,0,0"
29
26
  out, err = run_command(command)
30
27
  response = out + err
31
- response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
28
+ response.split("\n").each{ |l| raise("trim_and_encode: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
32
29
  end
33
30
 
34
31
  # Pass the command to run, and various options
@@ -49,7 +46,7 @@ module GoogleSpeech
49
46
 
50
47
  cmd = "#{nice}#{command}#{echo_return}"
51
48
 
52
- logger.info "google_speech - run_command: #{cmd}"
49
+ # logger.info "google_speech - run_command: #{cmd}"
53
50
  begin
54
51
  result = Timeout::timeout(timeout) {
55
52
  Open3::popen3(cmd) do |i,o,e|
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module GoogleSpeech
4
- VERSION = "0.3.0"
4
+ VERSION = "0.3.1"
5
5
  end
@@ -9,11 +9,10 @@ describe GoogleSpeech::Transcriber do
9
9
 
10
10
  transcriber = GoogleSpeech::Transcriber.new(f)
11
11
  t = transcriber.transcribe
12
- # puts "\n\nt: #{t.inspect}\n\n"
13
12
 
14
13
  t.size.must_equal 2
15
14
  t.first[:start_time].must_equal 0
16
- t.first[:end_time].must_equal 5.5
15
+ t.first[:end_time].must_equal 4.25
17
16
  t.first[:text].wont_be_nil
18
17
  t.first[:confidence].wont_be_nil
19
18
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_speech
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kuklewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-07 00:00:00.000000000 Z
11
+ date: 2014-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: excon