google_speech 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1e8c0b050e64d5ccc7521777e0b320f5544eff3b
4
- data.tar.gz: 7389a2c17862a3a3e31359b721fa9d2eeb8d3c3b
3
+ metadata.gz: 308bea0bbf8297bbe42f197906b271e789453629
4
+ data.tar.gz: 4829db0fc7ceecf1c91de0b044f108bf3016eea3
5
5
  SHA512:
6
- metadata.gz: 5d33ab4b317a1ef3acaa48e9d05c9e47248a8e5b0fc19de80a32729f0b7a37de0fe0d595ea56d63fefe14a78a41ac7e0fe261e53e54f34b2bc6ccf374e7c869c
7
- data.tar.gz: d2c37ebff2fb005d3fa8a69c860104977481f17b545be69ced5fb3c576d4a2940acf8b0ef074de8f715f2a8477dfd0a2c025e48ddf7be15dd286dade706b19ae
6
+ metadata.gz: 7b536794d1d6ee96227f2a21a3928a11d83583ef982597b872520422510a873bc1cbd431cffe8f4f52a6428a74d54900aa39a2ffffc0d8b054659c5a15704792
7
+ data.tar.gz: 28d88d63f1a08eccd9c36010a7cc0bfe89292487029c96528648529428b02210c9ebe09d26700e436b6157a67f733c35555d1fe10c1776336b928c3b093597c2
@@ -5,16 +5,17 @@ require 'tempfile'
5
5
  module GoogleSpeech
6
6
 
7
7
  class Chunk
8
- attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
8
+ attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file, :rate
9
9
 
10
- def initialize(original_file, original_duration, start_time, duration)
11
- @original_file = original_file
10
+ def initialize(original_file, original_duration, start_time, duration, rate)
11
+ @original_file = original_file
12
12
  @original_duration = original_duration
13
- @start_time = start_time
14
- @duration = [duration, (@original_duration - @start_time)].min
15
- @chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
13
+ @start_time = start_time
14
+ @duration = [duration, (@original_duration - @start_time)].min
15
+ @rate = rate
16
+ @chunk_file = Tempfile.new([File.basename(@original_file), '.wav'])
16
17
  # puts "@chunk_file: #{@chunk_file.path}"
17
- Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
18
+ Utility.trim_and_encode(@original_file.path, @chunk_file.path, @start_time, @duration, @rate)
18
19
  end
19
20
 
20
21
  def to_hash
@@ -4,12 +4,13 @@ module GoogleSpeech
4
4
 
5
5
  # break wav audio into short files
6
6
  class ChunkFactory
7
- attr_accessor :original_file, :chunk_duration, :overlap
7
+ attr_accessor :original_file, :chunk_duration, :overlap, :rate
8
8
 
9
- def initialize(original_file, chunk_duration=8, overlap=1)
9
+ def initialize(original_file, chunk_duration, overlap, rate)
10
10
  @chunk_duration = chunk_duration
11
11
  @original_file = original_file
12
12
  @overlap = overlap
13
+ @rate = rate
13
14
  @original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path)
14
15
  end
15
16
 
@@ -17,7 +18,7 @@ module GoogleSpeech
17
18
  def each
18
19
  pos = 0
19
20
  while(pos < @original_duration) do
20
- chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
21
+ chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap), @rate)
21
22
  yield chunk
22
23
  pos = pos + [chunk.duration, @chunk_duration].min
23
24
  end
@@ -10,22 +10,27 @@ module GoogleSpeech
10
10
 
11
11
  DEFAULT_OPTIONS = {
12
12
  :key => 'AIzaSyCnl6MRydhw_5fLXIdASxkLJzcJh5iX0M4',
13
- :language => 'en-US',
14
- :chunk_duration => 5,
15
- :overlap => 0.5,
13
+ :client => SecureRandom.hex,
14
+ :audio_type => 'audio/l16',
15
+ :rate => 8000,
16
+ :language => 'en-us',
17
+ :chunk_duration => 4.0,
18
+ :overlap => 0.25,
16
19
  :max_results => 1,
17
- :request_pause => 1,
18
- :profanity_filter => true
20
+ :request_pause => 0.1,
21
+ :profanity_filter => true,
22
+ :retry_max => 3
19
23
  }
20
24
 
21
25
  def initialize(original_file, options=nil)
22
26
  @original_file = original_file
23
27
  @options = DEFAULT_OPTIONS.merge(options || {})
24
28
  @results = []
29
+ @last_ua = 0
25
30
  end
26
31
 
27
32
  def transcribe
28
- chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap])
33
+ chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap], options[:rate])
29
34
  chunk_factory.each{ |chunk|
30
35
  result = chunk.to_hash
31
36
  transcript = transcribe_data(chunk.data)
@@ -63,43 +68,72 @@ module GoogleSpeech
63
68
  options[:profanity_filter] ? '1' : '0'
64
69
  end
65
70
 
71
+ def user_agent
72
+ ua_strings = [
73
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36',
74
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
75
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10',
76
+ 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36',
77
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36',
78
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36',
79
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36',
80
+ 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36',
81
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36',
82
+ 'Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36',
83
+ 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36',
84
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36',
85
+ 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36',
86
+ 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36',
87
+ 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
88
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
89
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
90
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
91
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
92
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36'
93
+ ]
94
+ ua = ua_strings[(@last_ua % ua_strings.length)]
95
+ @last_ua += 1
96
+ ua
97
+ end
98
+
66
99
  def transcribe_data(data)
67
100
  params = {
68
101
  :path => "/speech-api/v2/recognize",
69
- :query => "output=json&client=chromium&lang=#{options[:language]}&key=#{options[:key]}",
102
+ :query => "output=json&key=#{options[:key]}&client=#{options[:client]}&lang=#{options[:language]}",
70
103
  :body => data,
71
104
  :method => 'POST',
72
105
  :headers => {
73
- 'Content-Type' => 'audio/x-flac; rate=8000',
106
+ 'Content-Type' => "#{options[:audio_type]}; rate=#{options[:rate]}",
74
107
  'Content-Length' => data.bytesize,
75
- 'User-Agent' => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36"
108
+ 'User-Agent' => user_agent
76
109
  }
77
110
  }
111
+ # puts "data size: #{data.bytesize}"
78
112
  retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
79
113
  retry_count = 0
80
114
  result = nil
81
- url = "https://www.google.com:443#{params[:path]}"
115
+ url = "https://www.google.com#{params[:path]}"
82
116
  while(!result && retry_count < retry_max)
83
117
  retry_count += 1
84
-
85
118
  begin
86
119
  connection = Excon.new(url)
87
120
  response = connection.request(params)
88
121
  # puts "response: #{response.inspect}\n\n"
89
- # puts "response.body:\nSTART\n#{response.body}\nEND\n#{response.body.class.name}"
90
- if response.status.to_s.start_with?('2')
122
+ # puts "response.headers:\n#{response.headers}\n"
123
+ # puts "response.body:'#{response.body}'\n"
124
+ if response.status.to_s.start_with?('2') && response.body != "{\"result\":[]}\n"
91
125
  result = []
92
126
  if (response.body && response.body.size > 0)
93
127
  result = response.body.split("\n").collect{|b| JSON.parse(b)} rescue []
94
128
  end
95
129
  else
96
- logger.error "transcribe_data response unsuccessful, status: #{response.status}, response: #{response.inspect}"
97
- sleep(1)
130
+ logger.error " transcribe_data retrycount(#{retry_count}): status: #{response.status}, response: #{response.body.chomp}"
131
+ sleep(options[:request_pause].to_i)
98
132
  end
99
133
  rescue StandardError => err
100
134
  #need to do something to retry this - use new a13g func for this.
101
- logger.error "transcribe_data retrycount(#{retry_count}): error: #{err.message}"
102
- sleep(1)
135
+ logger.error " transcribe_data retrycount(#{retry_count}): error: #{err.message}"
136
+ sleep(options[:request_pause].to_i)
103
137
  end
104
138
 
105
139
  end
@@ -19,16 +19,13 @@ module GoogleSpeech
19
19
  duration
20
20
  end
21
21
 
22
- def trim_to_flac(wav_path, duration, flac_path, start, length)
22
+ def trim_and_encode(wav_path, flac_path, start, length, rate)
23
23
  check_local_file(wav_path)
24
24
 
25
- command = "sox -t wav '#{wav_path}' -r 8000 -c 1 -t flac '#{flac_path}' trim #{start} #{length} compand .5,2 -80,-80,-75,-50,-30,-15,0,0 norm -0.1"
26
-
27
- # command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start} #{length} rate 8000"
28
-
25
+ command = "sox -t wav '#{wav_path}' -t wav '#{flac_path}' norm channels 1 rate #{rate} trim #{start} #{length} compand .5,2 -80,-80,-75,-50,-30,-15,0,0"
29
26
  out, err = run_command(command)
30
27
  response = out + err
31
- response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
28
+ response.split("\n").each{ |l| raise("trim_and_encode: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
32
29
  end
33
30
 
34
31
  # Pass the command to run, and various options
@@ -49,7 +46,7 @@ module GoogleSpeech
49
46
 
50
47
  cmd = "#{nice}#{command}#{echo_return}"
51
48
 
52
- logger.info "google_speech - run_command: #{cmd}"
49
+ # logger.info "google_speech - run_command: #{cmd}"
53
50
  begin
54
51
  result = Timeout::timeout(timeout) {
55
52
  Open3::popen3(cmd) do |i,o,e|
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module GoogleSpeech
4
- VERSION = "0.3.0"
4
+ VERSION = "0.3.1"
5
5
  end
@@ -9,11 +9,10 @@ describe GoogleSpeech::Transcriber do
9
9
 
10
10
  transcriber = GoogleSpeech::Transcriber.new(f)
11
11
  t = transcriber.transcribe
12
- # puts "\n\nt: #{t.inspect}\n\n"
13
12
 
14
13
  t.size.must_equal 2
15
14
  t.first[:start_time].must_equal 0
16
- t.first[:end_time].must_equal 5.5
15
+ t.first[:end_time].must_equal 4.25
17
16
  t.first[:text].wont_be_nil
18
17
  t.first[:confidence].wont_be_nil
19
18
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_speech
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kuklewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-07 00:00:00.000000000 Z
11
+ date: 2014-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: excon