google_speech 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +21 -17
- data/lib/google_speech/transcriber.rb +7 -3
- data/lib/google_speech/utility.rb +4 -1
- data/lib/google_speech/version.rb +1 -1
- metadata +4 -4
data/README.md
CHANGED
@@ -26,20 +26,22 @@ Or install it yourself as:
|
|
26
26
|
|
27
27
|
## Usage
|
28
28
|
|
29
|
-
As a gem in
|
29
|
+
As a gem in your code:
|
30
30
|
|
31
|
+
```ruby
|
32
|
+
require 'google_speech'
|
31
33
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
t = transcriber.transcribe
|
34
|
+
f = File.open '/Users/you/Downloads/audio.wav'
|
35
|
+
transcriber = GoogleSpeech::Transcriber.new(f)
|
36
|
+
t = transcriber.transcribe
|
37
|
+
```
|
37
38
|
|
38
39
|
As a command line tool
|
39
40
|
|
40
41
|
> google_speech somefile.wav
|
41
42
|
|
42
|
-
Options:
|
43
|
+
### Options:
|
44
|
+
|
43
45
|
* language - what language is the speech in
|
44
46
|
* chunk_duration - length in seconds for each audio chunk of the wav to send
|
45
47
|
* overlap - chunking does not respect word boundaries; overlap can compensate
|
@@ -47,16 +49,18 @@ Options:
|
|
47
49
|
* request_pause - sleep seconds between chunk transcription requests
|
48
50
|
* profanity_filter - google by default filters profanity; this gem does not.
|
49
51
|
|
50
|
-
Default option values:
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
52
|
+
### Default option values:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
{
|
56
|
+
:language => 'en-US',
|
57
|
+
:chunk_duration => 8,
|
58
|
+
:overlap => 1,
|
59
|
+
:max_results => 2,
|
60
|
+
:request_pause => 1,
|
61
|
+
:profanity_filter => false
|
62
|
+
}
|
63
|
+
```
|
60
64
|
|
61
65
|
## Contributing
|
62
66
|
|
@@ -10,7 +10,7 @@ module GoogleSpeech
|
|
10
10
|
|
11
11
|
DEFAULT_OPTIONS = {
|
12
12
|
:language => 'en-US',
|
13
|
-
:chunk_duration =>
|
13
|
+
:chunk_duration => 5,
|
14
14
|
:overlap => 1,
|
15
15
|
:max_results => 2,
|
16
16
|
:request_pause => 1,
|
@@ -28,12 +28,14 @@ module GoogleSpeech
|
|
28
28
|
chunk_factory.each{ |chunk|
|
29
29
|
result = chunk.to_hash
|
30
30
|
transcript = transcribe_data(chunk.data)
|
31
|
+
next unless transcript
|
32
|
+
# puts "transcript: #{transcript.inspect}\n\n"
|
31
33
|
hypothesis = transcript['hypotheses'].first || Hash.new("")
|
32
34
|
result[:text] = hypothesis['utterance']
|
33
35
|
result[:confidence] = hypothesis['confidence']
|
34
36
|
@results << result
|
35
37
|
|
36
|
-
# puts "\n#{result[:start_time]}
|
38
|
+
# puts "\n#{result[:start_time]}: #{(result[:confidence].to_f * 100).to_i}%: #{result[:text]}"
|
37
39
|
|
38
40
|
sleep(options[:request_pause].to_i)
|
39
41
|
}
|
@@ -66,8 +68,10 @@ module GoogleSpeech
|
|
66
68
|
while(!result && retry_count < retry_max)
|
67
69
|
connection = Excon.new(url)
|
68
70
|
response = connection.request(params)
|
71
|
+
# puts "response: #{response.inspect}\n\n"
|
69
72
|
if response.status.to_s.start_with?('2')
|
70
|
-
result = JSON.parse(response.body)
|
73
|
+
result = JSON.parse(response.body)
|
74
|
+
# puts "results: #{result.inspect}\n\n"
|
71
75
|
else
|
72
76
|
sleep(1)
|
73
77
|
retry_count += 1
|
@@ -23,7 +23,10 @@ module GoogleSpeech
|
|
23
23
|
def trim_to_flac(wav_path, duration, flac_path, start, length)
|
24
24
|
check_local_file(wav_path)
|
25
25
|
|
26
|
-
command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i}
|
26
|
+
command = "sox -t wav '#{wav_path}' -r 16000 -c 1 -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i} compand .5,2 -80,-80,-75,-50,-30,-15,0,0 norm -0.1"
|
27
|
+
|
28
|
+
# command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i} rate 16k"
|
29
|
+
|
27
30
|
out, err = run_command(command)
|
28
31
|
response = out + err
|
29
32
|
response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google_speech
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03
|
12
|
+
date: 2013-09-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: excon
|
@@ -97,7 +97,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
97
97
|
version: '0'
|
98
98
|
segments:
|
99
99
|
- 0
|
100
|
-
hash:
|
100
|
+
hash: -4222726659936394268
|
101
101
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
102
|
none: false
|
103
103
|
requirements:
|
@@ -106,7 +106,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
106
|
version: '0'
|
107
107
|
segments:
|
108
108
|
- 0
|
109
|
-
hash:
|
109
|
+
hash: -4222726659936394268
|
110
110
|
requirements: []
|
111
111
|
rubyforge_project:
|
112
112
|
rubygems_version: 1.8.23
|