att_speech 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +5 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +58 -0
- data/README.md +32 -7
- data/Rakefile +3 -3
- data/VERSION +1 -1
- data/att_speech.gemspec +87 -0
- data/examples/bostonSeltics.wav +0 -0
- data/examples/example.rb +53 -0
- data/examples/helloWorld.txt +1 -0
- data/lib/att_speech/att_speech.rb +191 -156
- data/lib/att_speech/version.rb +2 -2
- data/spec/att_speech_spec.rb +52 -33
- metadata +25 -10
data/.travis.yml
ADDED
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activesupport (3.2.13)
|
5
|
+
i18n (= 0.6.1)
|
6
|
+
multi_json (~> 1.0)
|
7
|
+
celluloid (0.13.0)
|
8
|
+
timers (>= 1.0.0)
|
9
|
+
diff-lcs (1.2.4)
|
10
|
+
fakeweb (1.3.0)
|
11
|
+
faraday (0.8.7)
|
12
|
+
multipart-post (~> 1.1)
|
13
|
+
git (1.2.5)
|
14
|
+
hashie (2.0.4)
|
15
|
+
i18n (0.6.1)
|
16
|
+
jeweler (1.8.4)
|
17
|
+
bundler (~> 1.0)
|
18
|
+
git (>= 1.2.5)
|
19
|
+
rake
|
20
|
+
rdoc
|
21
|
+
json (1.7.7)
|
22
|
+
json (1.7.7-java)
|
23
|
+
multi_json (1.7.3)
|
24
|
+
multipart-post (1.2.0)
|
25
|
+
rake (10.0.4)
|
26
|
+
rdoc (4.0.1)
|
27
|
+
json (~> 1.4)
|
28
|
+
rspec (2.13.0)
|
29
|
+
rspec-core (~> 2.13.0)
|
30
|
+
rspec-expectations (~> 2.13.0)
|
31
|
+
rspec-mocks (~> 2.13.0)
|
32
|
+
rspec-core (2.13.1)
|
33
|
+
rspec-expectations (2.13.0)
|
34
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
35
|
+
rspec-mocks (2.13.1)
|
36
|
+
simplecov (0.7.1)
|
37
|
+
multi_json (~> 1.0)
|
38
|
+
simplecov-html (~> 0.7.1)
|
39
|
+
simplecov-html (0.7.1)
|
40
|
+
timers (1.1.0)
|
41
|
+
yard (0.8.6.1)
|
42
|
+
|
43
|
+
PLATFORMS
|
44
|
+
java
|
45
|
+
ruby
|
46
|
+
|
47
|
+
DEPENDENCIES
|
48
|
+
activesupport
|
49
|
+
bundler (>= 1.0.0)
|
50
|
+
celluloid (>= 0.11.1, < 0.14.0)
|
51
|
+
fakeweb
|
52
|
+
faraday (>= 0.8.1)
|
53
|
+
hashie (>= 1.2.0)
|
54
|
+
jeweler (>= 1.8.4)
|
55
|
+
rdoc (>= 3.12)
|
56
|
+
rspec (>= 2.8.0)
|
57
|
+
simplecov
|
58
|
+
yard (>= 0.7)
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
![Build Status](https://secure.travis-ci.org/jsgoecke/att_speech.png)
|
4
4
|
|
5
|
-
A Ruby library for consuming the AT&T [Speech API](https://developer.att.com/developer/apiDetailPage.jsp?passedItemId=10700023) for speech to text. API details may be found [here](
|
5
|
+
A Ruby library for consuming the AT&T [Speech API](https://developer.att.com/developer/apiDetailPage.jsp?passedItemId=10700023) for speech to text. API details may be found [here](https://developer.att.com/developer/basicTemplate.jsp?passedItemId=13100102&api=Speech&version=3).
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -15,8 +15,9 @@ gem install att_speech
|
|
15
15
|
```ruby
|
16
16
|
require 'att_speech'
|
17
17
|
|
18
|
-
att_speech = ATTSpeech.new({ :api_key => ENV['ATT_SPEECH_KEY'],
|
19
|
-
:secret_key => ENV['ATT_SPEECH_SECRET']
|
18
|
+
att_speech = ATTSpeech.new({ :api_key => ENV['ATT_SPEECH_KEY'],
|
19
|
+
:secret_key => ENV['ATT_SPEECH_SECRET'],
|
20
|
+
:scope => 'SPEECH' }) })
|
20
21
|
|
21
22
|
# Read the audio file contents
|
22
23
|
file_contents = File.read(File.expand_path(File.dirname(File.dirname(__FILE__))) + "/bostonSeltics.wav")
|
@@ -30,15 +31,39 @@ future = att_speech.future(:speech_to_text, file_contents, type='audio/wav')
|
|
30
31
|
p future.value
|
31
32
|
|
32
33
|
# Non-blocking operation that will call a block when the transcrption is returned
|
33
|
-
# Note: Remember, this is a concurrent operation so don't pass self and avoid mutable objects in the block
|
34
|
-
# from the calling context, better to have discreet actions contained in the block, such as inserting in a
|
34
|
+
# Note: Remember, this is a concurrent operation so don't pass self and avoid mutable objects in the block
|
35
|
+
# from the calling context, better to have discreet actions contained in the block, such as inserting in a
|
35
36
|
# datastore
|
36
37
|
sleep 2
|
37
38
|
att_speech.speech_to_text!(file_contents) { |transcription| p transcription }
|
38
39
|
sleep 5
|
40
|
+
|
41
|
+
|
42
|
+
def write_wav_file(audio_bytes)
|
43
|
+
file_name = "ret_audio-#{Time.now.strftime('%Y%m%d-%H%M%S')}.wav"
|
44
|
+
full_file_name = File.expand_path(File.join(File.dirname(File.dirname(__FILE__)), 'examples', file_name))
|
45
|
+
audio_file = File.open(full_file_name, "w")
|
46
|
+
audio_file << audio_bytes
|
47
|
+
audio_file.close
|
48
|
+
end
|
49
|
+
|
50
|
+
att_text = ATTSpeech.new({ :api_key => ENV['ATT_SPEECH_KEY'],
|
51
|
+
:secret_key => ENV['ATT_SPEECH_SECRET'],
|
52
|
+
:scope => 'TTS' })
|
53
|
+
|
54
|
+
# Read the text file contents
|
55
|
+
tfp = File.expand_path(File.join(File.dirname(File.dirname(__FILE__)), 'examples', 'helloWorld.txt'))
|
56
|
+
txt_contents = File.read(tfp)
|
57
|
+
|
58
|
+
audio = att_text.text_to_speech(txt_contents)
|
59
|
+
write_wav_file(audio)
|
60
|
+
|
61
|
+
# Non-blocking operation with a future, if you have a longer file that requires more processing time
|
62
|
+
sleep 2
|
63
|
+
future = att_text.future(:text_to_speech, "This is a hello world.", type='text/plain')
|
64
|
+
write_wav_file(future.value)
|
39
65
|
```
|
40
66
|
|
41
67
|
## Copyright
|
42
68
|
|
43
|
-
Copyright (c)
|
44
|
-
|
69
|
+
Copyright (c) 2013 Jason Goecke. See LICENSE.txt for further details.
|
data/Rakefile
CHANGED
@@ -17,10 +17,10 @@ Jeweler::Tasks.new do |gem|
|
|
17
17
|
gem.name = "att_speech"
|
18
18
|
gem.homepage = "http://github.com/jsgoecke/att_speech"
|
19
19
|
gem.license = "MIT"
|
20
|
-
gem.summary = %Q{
|
21
|
-
gem.description = %Q{
|
20
|
+
gem.summary = %Q{A Ruby library for consuming the AT&T Speech API https://developer.att.com/developer/forward.jsp?passedItemId=12500023 for speech->text, and text->speech.}
|
21
|
+
gem.description = %Q{A Ruby library for consuming v3 of the AT&T Speech API for speech->text, and text->speech. Takes in either .wav or specific other audio files, and returns a text string of the spoken words. Can also take in either a text string or .txt file and returns a string of bytes from which a .wav file can be created of the spoken text.}
|
22
22
|
gem.email = "jason@goecke.net"
|
23
|
-
gem.authors = ["Jason Goecke"]
|
23
|
+
gem.authors = ["Jason Goecke, Peter Wilson"]
|
24
24
|
# dependencies defined in Gemfile
|
25
25
|
end
|
26
26
|
Jeweler::RubygemsDotOrgTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.4
|
data/att_speech.gemspec
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "att_speech"
|
8
|
+
s.version = "0.0.4"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Jason Goecke, Peter Wilson"]
|
12
|
+
s.date = "2013-05-09"
|
13
|
+
s.description = "A Ruby library for consuming v3 of the AT&T Speech API for speech->text, and text->speech. Takes in either .wav or specific other audio files, and returns a text string of the spoken words. Can also take in either a text string or .txt file and returns a string of bytes from which a .wav file can be created of the spoken text."
|
14
|
+
s.email = "jason@goecke.net"
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.md"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".rspec",
|
22
|
+
".travis.yml",
|
23
|
+
"Gemfile",
|
24
|
+
"Gemfile.lock",
|
25
|
+
"LICENSE.txt",
|
26
|
+
"README.md",
|
27
|
+
"Rakefile",
|
28
|
+
"VERSION",
|
29
|
+
"att_speech.gemspec",
|
30
|
+
"examples/bostonSeltics.wav",
|
31
|
+
"examples/example.rb",
|
32
|
+
"examples/helloWorld.txt",
|
33
|
+
"lib/att_speech.rb",
|
34
|
+
"lib/att_speech/att_speech.rb",
|
35
|
+
"lib/att_speech/version.rb",
|
36
|
+
"spec/att_speech_spec.rb",
|
37
|
+
"spec/spec_helper.rb"
|
38
|
+
]
|
39
|
+
s.homepage = "http://github.com/jsgoecke/att_speech"
|
40
|
+
s.licenses = ["MIT"]
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = "1.8.24"
|
43
|
+
s.summary = "A Ruby library for consuming the AT&T Speech API https://developer.att.com/developer/forward.jsp?passedItemId=12500023 for speech->text, and text->speech."
|
44
|
+
|
45
|
+
if s.respond_to? :specification_version then
|
46
|
+
s.specification_version = 3
|
47
|
+
|
48
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
+
s.add_runtime_dependency(%q<faraday>, [">= 0.8.1"])
|
50
|
+
s.add_runtime_dependency(%q<celluloid>, ["< 0.14.0", ">= 0.11.1"])
|
51
|
+
s.add_runtime_dependency(%q<hashie>, [">= 1.2.0"])
|
52
|
+
s.add_runtime_dependency(%q<activesupport>, [">= 0"])
|
53
|
+
s.add_development_dependency(%q<rspec>, [">= 2.8.0"])
|
54
|
+
s.add_development_dependency(%q<yard>, [">= 0.7"])
|
55
|
+
s.add_development_dependency(%q<rdoc>, [">= 3.12"])
|
56
|
+
s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
|
57
|
+
s.add_development_dependency(%q<jeweler>, [">= 1.8.4"])
|
58
|
+
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
59
|
+
s.add_development_dependency(%q<fakeweb>, [">= 0"])
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<faraday>, [">= 0.8.1"])
|
62
|
+
s.add_dependency(%q<celluloid>, ["< 0.14.0", ">= 0.11.1"])
|
63
|
+
s.add_dependency(%q<hashie>, [">= 1.2.0"])
|
64
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
65
|
+
s.add_dependency(%q<rspec>, [">= 2.8.0"])
|
66
|
+
s.add_dependency(%q<yard>, [">= 0.7"])
|
67
|
+
s.add_dependency(%q<rdoc>, [">= 3.12"])
|
68
|
+
s.add_dependency(%q<bundler>, [">= 1.0.0"])
|
69
|
+
s.add_dependency(%q<jeweler>, [">= 1.8.4"])
|
70
|
+
s.add_dependency(%q<simplecov>, [">= 0"])
|
71
|
+
s.add_dependency(%q<fakeweb>, [">= 0"])
|
72
|
+
end
|
73
|
+
else
|
74
|
+
s.add_dependency(%q<faraday>, [">= 0.8.1"])
|
75
|
+
s.add_dependency(%q<celluloid>, ["< 0.14.0", ">= 0.11.1"])
|
76
|
+
s.add_dependency(%q<hashie>, [">= 1.2.0"])
|
77
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
78
|
+
s.add_dependency(%q<rspec>, [">= 2.8.0"])
|
79
|
+
s.add_dependency(%q<yard>, [">= 0.7"])
|
80
|
+
s.add_dependency(%q<rdoc>, [">= 3.12"])
|
81
|
+
s.add_dependency(%q<bundler>, [">= 1.0.0"])
|
82
|
+
s.add_dependency(%q<jeweler>, [">= 1.8.4"])
|
83
|
+
s.add_dependency(%q<simplecov>, [">= 0"])
|
84
|
+
s.add_dependency(%q<fakeweb>, [">= 0"])
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
Binary file
|
data/examples/example.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
$LOAD_PATH << './lib'
|
2
|
+
|
3
|
+
lp = File.join(File.dirname(File.dirname(__FILE__)), 'lib', 'att_speech.rb')
|
4
|
+
require lp
|
5
|
+
|
6
|
+
att_speech = ATTSpeech.new({ :api_key => ENV['ATT_SPEECH_KEY'],
|
7
|
+
:secret_key => ENV['ATT_SPEECH_SECRET'],
|
8
|
+
:scope => 'SPEECH' })
|
9
|
+
|
10
|
+
# Read the audio file contents
|
11
|
+
fp = File.expand_path(File.join(File.dirname(File.dirname(__FILE__)), 'examples', 'bostonSeltics.wav'))
|
12
|
+
file_contents = File.read(fp)
|
13
|
+
|
14
|
+
# Blocking operation
|
15
|
+
p att_speech.speech_to_text(file_contents, type='audio/wav')
|
16
|
+
|
17
|
+
# Non-blocking operation with a future, if you have a longer file that requires more processing time
|
18
|
+
sleep 2
|
19
|
+
future = att_speech.future(:speech_to_text, file_contents, type='audio/wav')
|
20
|
+
p future.value
|
21
|
+
|
22
|
+
# Non-blocking operation that will call a block when the transcrption is returned
|
23
|
+
# Note: Remember, this is a concurrent operation so don't pass self and avoid mutable objects in the block
|
24
|
+
# from the calling context, better to have discreet actions contained in the block, such as inserting in a
|
25
|
+
# datastore
|
26
|
+
sleep 2
|
27
|
+
att_speech.speech_to_text!(file_contents) { |transcription| p transcription }
|
28
|
+
sleep 5
|
29
|
+
|
30
|
+
|
31
|
+
def write_wav_file(audio_bytes)
|
32
|
+
file_name = "ret_audio-#{Time.now.strftime('%Y%m%d-%H%M%S')}.wav"
|
33
|
+
full_file_name = File.expand_path(File.join(File.dirname(File.dirname(__FILE__)), 'examples', file_name))
|
34
|
+
audio_file = File.open(full_file_name, "w")
|
35
|
+
audio_file << audio_bytes
|
36
|
+
audio_file.close
|
37
|
+
end
|
38
|
+
|
39
|
+
att_text = ATTSpeech.new({ :api_key => ENV['ATT_SPEECH_KEY'],
|
40
|
+
:secret_key => ENV['ATT_SPEECH_SECRET'],
|
41
|
+
:scope => 'TTS' })
|
42
|
+
|
43
|
+
# Read the text file contents
|
44
|
+
tfp = File.expand_path(File.join(File.dirname(File.dirname(__FILE__)), 'examples', 'helloWorld.txt'))
|
45
|
+
txt_contents = File.read(tfp)
|
46
|
+
|
47
|
+
audio = att_text.text_to_speech(txt_contents)
|
48
|
+
write_wav_file(audio)
|
49
|
+
|
50
|
+
# Non-blocking operation with a future, if you have a longer file that requires more processing time
|
51
|
+
sleep 2
|
52
|
+
future = att_text.future(:text_to_speech, "This is a hello world.", type='text/plain')
|
53
|
+
write_wav_file(future.value)
|
@@ -0,0 +1 @@
|
|
1
|
+
Hello World!
|
@@ -1,157 +1,192 @@
|
|
1
1
|
class ATTSpeech
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
2
|
+
include Celluloid
|
3
|
+
Celluloid.logger = nil
|
4
|
+
|
5
|
+
attr_reader :api_key, :secret_key, :access_token, :refresh_token, :base_url, :ssl_verify, :scope
|
6
|
+
|
7
|
+
##
|
8
|
+
# Creates an ATTSpeech object
|
9
|
+
#
|
10
|
+
# @overload initialize(args)
|
11
|
+
# @param [Hash] args the options to intantiate with
|
12
|
+
# @option args [String] :api_key the AT&T Speech API Key
|
13
|
+
# @option args [String] :secret_key the AT&T Speech API Secret Key
|
14
|
+
# @option args [String] :scope the Authorization Scope for the AT&T Speech API
|
15
|
+
# @option args [String] :base_url the url for the AT&T Speech API, default is 'https://api.att.com'
|
16
|
+
# @option args [Boolean] :ssl_verify determines if the peer Cert is verified for SSL, default is true
|
17
|
+
# @overload initialize(api_key, secret_key, base_url='https://api.att.com')
|
18
|
+
# @param [String] api_key the AT&T Speech API Key
|
19
|
+
# @param [String] secret_key the AT&T Speech API Secret Key
|
20
|
+
# @param [String] scope the Authorization Scope for the AT&T Speech API
|
21
|
+
# @param [String] base_url the url for the AT&T Speech API, default is 'https://api.att.com'
|
22
|
+
# @param [Boolean] ssl_verify determines if the peer Cert is verified for SSL, default is true
|
23
|
+
#
|
24
|
+
# @return [Object] an instance of ATTSpeech
|
25
|
+
def initialize(*args)
|
26
|
+
raise ArgumentError, "Requires at least the api_key, secret_key, and scope when instatiating" if args.size == 0
|
27
|
+
|
28
|
+
base_url = 'https://api.att.com'
|
29
|
+
|
30
|
+
if args.size == 1 && args[0].instance_of?(Hash)
|
31
|
+
@api_key = args[0][:api_key]
|
32
|
+
@secret_key = args[0][:secret_key]
|
33
|
+
@scope = args[0][:scope]
|
34
|
+
@base_url = args[0][:base_url] || base_url
|
35
|
+
set_ssl_verify args[0][:ssl_verify]
|
36
|
+
else
|
37
|
+
@api_key = args[0]
|
38
|
+
@secret_key = args[1]
|
39
|
+
@scope = args[2]
|
40
|
+
@base_url = args[3] || base_url
|
41
|
+
set_ssl_verify args[4]
|
42
|
+
end
|
43
|
+
|
44
|
+
raise ArgumentError, "scope must be either 'SPEECH' or 'TTS'" unless (@scope == 'SPEECH') || (@scope == 'TTS')
|
45
|
+
|
46
|
+
@grant_type = 'client_credentials'
|
47
|
+
@access_token = ''
|
48
|
+
@refresh_token = ''
|
49
|
+
|
50
|
+
if @scope == 'SPEECH'
|
51
|
+
create_connection 'application/json'
|
52
|
+
else
|
53
|
+
create_connection 'audio/x-wav'
|
54
|
+
end
|
55
|
+
|
56
|
+
get_tokens
|
57
|
+
|
58
|
+
Actor.current
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# Allows you to send a file and return the speech to text result
|
63
|
+
# @param [String] file_contents to be processed
|
64
|
+
# @param [String] type of file to be processed, may be audio/wav, application/octet-stream or audio/amr
|
65
|
+
# @param [String] speech_context to use to evaluate the audio BusinessSearch, Gaming, Generic, QuestionAndAnswer, SMS, SocialMedia, TV, VoiceMail, WebSearch
|
66
|
+
# @param [Block] block to be called when the transcription completes
|
67
|
+
#
|
68
|
+
# @return [Hash] the resulting response from the AT&T Speech API
|
69
|
+
def speech_to_text(file_contents, type='audio/wav', speech_context='Generic', &block)
|
70
|
+
resource = "/speech/v3/speechToText"
|
71
|
+
|
72
|
+
if type == "application/octet-stream"
|
73
|
+
type = "audio/amr"
|
74
|
+
end
|
75
|
+
|
76
|
+
begin
|
77
|
+
response = @connection.post( resource,
|
78
|
+
file_contents,
|
79
|
+
:Authorization => "Bearer #{@access_token}",
|
80
|
+
:Content_Transfer_Encoding => 'chunked',
|
81
|
+
:X_SpeechContext => speech_context,
|
82
|
+
:Content_Type => type,
|
83
|
+
:Accept => 'application/json' )
|
84
|
+
|
85
|
+
result = process_response(response)
|
86
|
+
block.call result if block_given?
|
87
|
+
result
|
88
|
+
rescue => e
|
89
|
+
raise RuntimeError, e.to_s
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
##
|
95
|
+
# Allows you to send a string or plain text file and return the text to speech result
|
96
|
+
# @param [String] string or file_contents to be processed
|
97
|
+
# @param [String] type of file or object to be processed, may be text/plain, or application/ssml+xml
|
98
|
+
#
|
99
|
+
# @return [String] the bytes of the resulting response from the AT&T Speech API
|
100
|
+
def text_to_speech(file_contents, type='text/plain')
|
101
|
+
resource = "/speech/v3/textToSpeech"
|
102
|
+
|
103
|
+
begin
|
104
|
+
response = @connection.post( resource,
|
105
|
+
file_contents,
|
106
|
+
:Authorization => "Bearer #{@access_token}",
|
107
|
+
:Content_Type => type,
|
108
|
+
:Accept => 'audio/x-wav' )
|
109
|
+
|
110
|
+
response.body
|
111
|
+
rescue => e
|
112
|
+
raise RuntimeError, e.to_s
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
##
|
120
|
+
# Creates the Faraday connection object
|
121
|
+
def create_connection(accept_type='application/json')
|
122
|
+
@connection = Faraday.new(:url => @base_url, :ssl => { :verify => @ssl_verify }) do |faraday|
|
123
|
+
faraday.headers['Accept'] = accept_type
|
124
|
+
faraday.adapter Faraday.default_adapter
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# Obtains the session tokens
|
130
|
+
def get_tokens
|
131
|
+
resource = "/oauth/access_token"
|
132
|
+
|
133
|
+
begin
|
134
|
+
response = @connection.post resource do |request|
|
135
|
+
request.params['client_id'] = @api_key
|
136
|
+
request.params['client_secret'] = @secret_key
|
137
|
+
request.params['grant_type'] = @grant_type
|
138
|
+
request.params['scope'] = @scope
|
139
|
+
end
|
140
|
+
|
141
|
+
result = process_response(response)
|
142
|
+
|
143
|
+
if result[:access_token].nil? || result[:refresh_token].nil?
|
144
|
+
raise RuntimeError, "Unable to complete oauth: #{response[:error]}"
|
145
|
+
else
|
146
|
+
@access_token = result[:access_token]
|
147
|
+
@refresh_token = result[:refresh_token]
|
148
|
+
end
|
149
|
+
rescue => e
|
150
|
+
raise RuntimeError, e.to_s
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
##
|
155
|
+
# Process the JSON returned into a Hashie::Mash and making it more Ruby friendly
|
156
|
+
#
|
157
|
+
# @param [String] reponse json
|
158
|
+
#
|
159
|
+
# @return [Object] a Hashie::Mash object
|
160
|
+
def process_response(response)
|
161
|
+
Hashie::Mash.new(underscore_hash(JSON.parse(response.body)))
|
162
|
+
end
|
163
|
+
|
164
|
+
##
|
165
|
+
# Sets the ssl_verify option
|
166
|
+
#
|
167
|
+
# @param [Boolean] ssl_verify the variable to set
|
168
|
+
def set_ssl_verify(ssl_verify)
|
169
|
+
if ssl_verify == false
|
170
|
+
@ssl_verify = false
|
171
|
+
else
|
172
|
+
@ssl_verify = true
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
##
|
177
|
+
# Decamelizes the keys in a hash to be more Ruby friendly
|
178
|
+
#
|
179
|
+
# @param [Hash] hash to be decamelized
|
180
|
+
#
|
181
|
+
# @return [Hash] the hash with the keys decamalized
|
182
|
+
def underscore_hash(hash)
|
183
|
+
hash.inject({}) do |underscored, (key, value)|
|
184
|
+
value = underscore_hash(value) if value.is_a?(Hash)
|
185
|
+
if value.is_a?(Array)
|
186
|
+
value = underscore_hash(value[0]) if value[0].is_a?(Hash)
|
187
|
+
end
|
188
|
+
underscored[key.underscore] = value
|
189
|
+
underscored
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
data/lib/att_speech/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
class ATTSpeech
|
2
|
-
|
3
|
-
end
|
2
|
+
VERSION = "0.0.4"
|
3
|
+
end
|
data/spec/att_speech_spec.rb
CHANGED
@@ -2,106 +2,125 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
2
2
|
|
3
3
|
describe "AttSpeech" do
|
4
4
|
FakeWeb.allow_net_connect = false
|
5
|
-
|
6
|
-
FakeWeb.register_uri(:post,
|
7
|
-
"https://api.att.com/oauth/access_token?client_id=1234&client_secret=abcd&grant_type=client_credentials&scope=SPEECH",
|
8
|
-
:status => ['200', 'OK'],
|
5
|
+
|
6
|
+
FakeWeb.register_uri(:post,
|
7
|
+
"https://api.att.com/oauth/access_token?client_id=1234&client_secret=abcd&grant_type=client_credentials&scope=SPEECH",
|
8
|
+
:status => ['200', 'OK'],
|
9
9
|
:body => '{"access_token":"5678","refresh_token":"wxyz"}')
|
10
|
-
|
11
|
-
FakeWeb.register_uri(:post,
|
12
|
-
"http://foobar.com/oauth/access_token?client_id=1234&client_secret=abcd&grant_type=client_credentials&scope=SPEECH",
|
13
|
-
:status => ['200', 'OK'],
|
10
|
+
|
11
|
+
FakeWeb.register_uri(:post,
|
12
|
+
"http://foobar.com/oauth/access_token?client_id=1234&client_secret=abcd&grant_type=client_credentials&scope=SPEECH",
|
13
|
+
:status => ['200', 'OK'],
|
14
14
|
:body => '{"access_token":"5678","refresh_token":"wxyz"}')
|
15
|
-
|
16
|
-
FakeWeb.register_uri(:post,
|
17
|
-
"https://api.att.com/
|
18
|
-
:status => ['200', 'OK'],
|
15
|
+
|
16
|
+
FakeWeb.register_uri(:post,
|
17
|
+
"https://api.att.com/speech/v3/speechToText",
|
18
|
+
:status => ['200', 'OK'],
|
19
19
|
:body => "{\"Recognition\":{\"ResponseId\":\"2b0bdcf4301f5c4aba57e2765b59bcbe\",\"NBest\":[{\"WordScores\":[1,1],\"Confidence\":1,\"Grade\":\"accept\",\"ResultText\":\"Boston celtics.\",\"Words\":[\"Boston\",\"celtics.\"],\"LanguageId\":\"en-us\",\"Hypothesis\":\"Boston celtics.\"}]}}")
|
20
|
-
|
21
|
-
let(:att_speech) { att_speech = ATTSpeech.new
|
20
|
+
|
21
|
+
let(:att_speech) { att_speech = ATTSpeech.new('1234', 'abcd', 'SPEECH') }
|
22
22
|
let(:att_speech_hash) { att_speech = ATTSpeech.new({ :api_key => '1234',
|
23
|
-
:secret_key => 'abcd'
|
24
|
-
|
23
|
+
:secret_key => 'abcd',
|
24
|
+
:scope => 'SPEECH' })}
|
25
|
+
|
26
|
+
|
25
27
|
describe 'initializing' do
|
26
28
|
it "should raise an error of no parameters passed when creating object" do
|
27
29
|
begin
|
28
30
|
ATTSpeech.new
|
29
31
|
rescue => e
|
30
|
-
e.to_s.should eql "
|
32
|
+
e.to_s.should eql "Requires at least the api_key, secret_key, and scope when instatiating"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
it "shoud raise an error of wrong scope when creating object without scope" do
|
37
|
+
begin
|
38
|
+
ATTSpeech.new('1234', 'abcd')
|
39
|
+
rescue => e
|
40
|
+
e.to_s.should eql "scope must be either 'SPEECH' or 'TTS'"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
it "shoud raise an error of wrong scope when creating object with misspelled scope" do
|
45
|
+
begin
|
46
|
+
ATTSpeech.new('1234', 'abcd', 'misspelled scope')
|
47
|
+
rescue => e
|
48
|
+
e.to_s.should eql "scope must be either 'SPEECH' or 'TTS'"
|
31
49
|
end
|
32
50
|
end
|
33
|
-
|
51
|
+
|
34
52
|
it "should create an ATTSpeech object" do
|
35
53
|
att_speech.class.should eql ATTSpeech
|
36
54
|
att_speech_hash.class.should eql ATTSpeech
|
37
55
|
end
|
38
|
-
|
56
|
+
|
39
57
|
it 'should set the url to something different' do
|
40
|
-
as = ATTSpeech.new('1234', 'abcd', 'http://foobar.com', false)
|
58
|
+
as = ATTSpeech.new('1234', 'abcd', 'SPEECH', 'http://foobar.com', false)
|
41
59
|
as.base_url.should == 'http://foobar.com'
|
42
60
|
as.ssl_verify.should == false
|
43
|
-
|
61
|
+
|
44
62
|
as = ATTSpeech.new({ :api_key => '1234',
|
45
63
|
:secret_key => 'abcd',
|
64
|
+
:scope => 'SPEECH',
|
46
65
|
:base_url => 'http://foobar.com',
|
47
66
|
:ssl_verify => false })
|
48
67
|
as.base_url.should == 'http://foobar.com'
|
49
68
|
as.ssl_verify.should == false
|
50
69
|
end
|
51
|
-
|
70
|
+
|
52
71
|
it "should set the access_token and refresh_token" do
|
53
72
|
att_speech.access_token.should eql '5678'
|
54
73
|
att_speech.refresh_token.should eql 'wxyz'
|
55
74
|
att_speech.base_url.should == 'https://api.att.com'
|
56
75
|
att_speech.ssl_verify.should == true
|
57
|
-
|
76
|
+
|
58
77
|
att_speech_hash.access_token.should eql '5678'
|
59
78
|
att_speech_hash.refresh_token.should eql 'wxyz'
|
60
79
|
att_speech_hash.base_url.should == 'https://api.att.com'
|
61
80
|
att_speech_hash.ssl_verify.should == true
|
62
81
|
end
|
63
82
|
end
|
64
|
-
|
83
|
+
|
65
84
|
describe 'blocking call' do
|
66
85
|
it "should return a Hashie::Mash object when processing an audio file" do
|
67
86
|
result = att_speech.speech_to_text 'spec/spec_helper.rb'
|
68
87
|
result.instance_of?(Hashie::Mash).should eql true
|
69
|
-
|
88
|
+
|
70
89
|
result = att_speech_hash.speech_to_text 'spec/spec_helper.rb'
|
71
90
|
result.instance_of?(Hashie::Mash).should eql true
|
72
91
|
end
|
73
|
-
|
92
|
+
|
74
93
|
it "should attempt to process an audio file" do
|
75
94
|
result = att_speech.speech_to_text 'spec/spec_helper.rb'
|
76
95
|
result[:recognition][:response_id].should eql '2b0bdcf4301f5c4aba57e2765b59bcbe'
|
77
96
|
result[:recognition][:n_best][:confidence].should eql 1
|
78
|
-
|
97
|
+
|
79
98
|
result = att_speech_hash.speech_to_text 'spec/spec_helper.rb'
|
80
99
|
result[:recognition][:response_id].should eql '2b0bdcf4301f5c4aba57e2765b59bcbe'
|
81
100
|
result[:recognition][:n_best][:confidence].should eql 1
|
82
101
|
end
|
83
102
|
end
|
84
|
-
|
103
|
+
|
85
104
|
describe 'non-blocking call' do
|
86
105
|
it "should return a Celluloid::Future object when processing an audio file" do
|
87
106
|
future = att_speech.future(:speech_to_text, 'spec/spec_helper.rb')
|
88
107
|
future.instance_of?(Celluloid::Future).should eql true
|
89
|
-
|
108
|
+
|
90
109
|
future = att_speech_hash.future(:speech_to_text, 'spec/spec_helper.rb')
|
91
110
|
future.instance_of?(Celluloid::Future).should eql true
|
92
111
|
end
|
93
|
-
|
112
|
+
|
94
113
|
it "should allow us to use a future to process an audio file" do
|
95
114
|
future = att_speech.future(:speech_to_text, 'spec/spec_helper.rb')
|
96
115
|
future.value[:recognition][:response_id].should eql '2b0bdcf4301f5c4aba57e2765b59bcbe'
|
97
116
|
future.value[:recognition][:n_best][:confidence].should eql 1
|
98
|
-
|
117
|
+
|
99
118
|
future = att_speech_hash.future(:speech_to_text, 'spec/spec_helper.rb')
|
100
119
|
future.value[:recognition][:response_id].should eql '2b0bdcf4301f5c4aba57e2765b59bcbe'
|
101
120
|
future.value[:recognition][:n_best][:confidence].should eql 1
|
102
121
|
end
|
103
122
|
end
|
104
|
-
|
123
|
+
|
105
124
|
describe 'non-blocking call with a block' do
|
106
125
|
it "should allow us to use a future to process an audio file and pass a block" do
|
107
126
|
result = nil
|
@@ -109,7 +128,7 @@ describe "AttSpeech" do
|
|
109
128
|
sleep 0.5
|
110
129
|
result[:recognition][:response_id].should eql '2b0bdcf4301f5c4aba57e2765b59bcbe'
|
111
130
|
result[:recognition][:n_best][:confidence].should eql 1
|
112
|
-
|
131
|
+
|
113
132
|
result = nil
|
114
133
|
att_speech_hash.speech_to_text!('spec/spec_helper.rb') { |transcription| result = transcription }
|
115
134
|
sleep 0.5
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: att_speech
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
|
-
- Jason Goecke
|
8
|
+
- Jason Goecke, Peter Wilson
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-05-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: faraday
|
@@ -32,6 +32,9 @@ dependencies:
|
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
none: false
|
34
34
|
requirements:
|
35
|
+
- - <
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.14.0
|
35
38
|
- - ! '>='
|
36
39
|
- !ruby/object:Gem::Version
|
37
40
|
version: 0.11.1
|
@@ -40,6 +43,9 @@ dependencies:
|
|
40
43
|
version_requirements: !ruby/object:Gem::Requirement
|
41
44
|
none: false
|
42
45
|
requirements:
|
46
|
+
- - <
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 0.14.0
|
43
49
|
- - ! '>='
|
44
50
|
- !ruby/object:Gem::Version
|
45
51
|
version: 0.11.1
|
@@ -187,7 +193,11 @@ dependencies:
|
|
187
193
|
- - ! '>='
|
188
194
|
- !ruby/object:Gem::Version
|
189
195
|
version: '0'
|
190
|
-
description: A Ruby library for consuming the AT&T Speech API for speech
|
196
|
+
description: A Ruby library for consuming v3 of the AT&T Speech API for speech->text,
|
197
|
+
and text->speech. Takes in either .wav or specific other audio files, and returns
|
198
|
+
a text string of the spoken words. Can also take in either a text string or .txt
|
199
|
+
file and returns a string of bytes from which a .wav file can be created of the
|
200
|
+
spoken text.
|
191
201
|
email: jason@goecke.net
|
192
202
|
executables: []
|
193
203
|
extensions: []
|
@@ -197,16 +207,22 @@ extra_rdoc_files:
|
|
197
207
|
files:
|
198
208
|
- .document
|
199
209
|
- .rspec
|
210
|
+
- .travis.yml
|
200
211
|
- Gemfile
|
212
|
+
- Gemfile.lock
|
201
213
|
- LICENSE.txt
|
214
|
+
- README.md
|
202
215
|
- Rakefile
|
203
216
|
- VERSION
|
217
|
+
- att_speech.gemspec
|
218
|
+
- examples/bostonSeltics.wav
|
219
|
+
- examples/example.rb
|
220
|
+
- examples/helloWorld.txt
|
204
221
|
- lib/att_speech.rb
|
205
|
-
- lib/att_speech/version.rb
|
206
222
|
- lib/att_speech/att_speech.rb
|
223
|
+
- lib/att_speech/version.rb
|
207
224
|
- spec/att_speech_spec.rb
|
208
225
|
- spec/spec_helper.rb
|
209
|
-
- README.md
|
210
226
|
homepage: http://github.com/jsgoecke/att_speech
|
211
227
|
licenses:
|
212
228
|
- MIT
|
@@ -228,10 +244,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
228
244
|
version: '0'
|
229
245
|
requirements: []
|
230
246
|
rubyforge_project:
|
231
|
-
rubygems_version: 1.8.
|
247
|
+
rubygems_version: 1.8.25
|
232
248
|
signing_key:
|
233
249
|
specification_version: 3
|
234
|
-
summary: A Ruby library for consuming the AT&T Speech API https://developer.att.com/developer/
|
235
|
-
for speech
|
250
|
+
summary: A Ruby library for consuming the AT&T Speech API https://developer.att.com/developer/forward.jsp?passedItemId=12500023
|
251
|
+
for speech->text, and text->speech.
|
236
252
|
test_files: []
|
237
|
-
has_rdoc:
|