att_speech 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +5 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +58 -0
- data/README.md +32 -7
- data/Rakefile +3 -3
- data/VERSION +1 -1
- data/att_speech.gemspec +87 -0
- data/examples/bostonSeltics.wav +0 -0
- data/examples/example.rb +53 -0
- data/examples/helloWorld.txt +1 -0
- data/lib/att_speech/att_speech.rb +191 -156
- data/lib/att_speech/version.rb +2 -2
- data/spec/att_speech_spec.rb +52 -33
- metadata +25 -10
data/.travis.yml
ADDED
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activesupport (3.2.13)
|
5
|
+
i18n (= 0.6.1)
|
6
|
+
multi_json (~> 1.0)
|
7
|
+
celluloid (0.13.0)
|
8
|
+
timers (>= 1.0.0)
|
9
|
+
diff-lcs (1.2.4)
|
10
|
+
fakeweb (1.3.0)
|
11
|
+
faraday (0.8.7)
|
12
|
+
multipart-post (~> 1.1)
|
13
|
+
git (1.2.5)
|
14
|
+
hashie (2.0.4)
|
15
|
+
i18n (0.6.1)
|
16
|
+
jeweler (1.8.4)
|
17
|
+
bundler (~> 1.0)
|
18
|
+
git (>= 1.2.5)
|
19
|
+
rake
|
20
|
+
rdoc
|
21
|
+
json (1.7.7)
|
22
|
+
json (1.7.7-java)
|
23
|
+
multi_json (1.7.3)
|
24
|
+
multipart-post (1.2.0)
|
25
|
+
rake (10.0.4)
|
26
|
+
rdoc (4.0.1)
|
27
|
+
json (~> 1.4)
|
28
|
+
rspec (2.13.0)
|
29
|
+
rspec-core (~> 2.13.0)
|
30
|
+
rspec-expectations (~> 2.13.0)
|
31
|
+
rspec-mocks (~> 2.13.0)
|
32
|
+
rspec-core (2.13.1)
|
33
|
+
rspec-expectations (2.13.0)
|
34
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
35
|
+
rspec-mocks (2.13.1)
|
36
|
+
simplecov (0.7.1)
|
37
|
+
multi_json (~> 1.0)
|
38
|
+
simplecov-html (~> 0.7.1)
|
39
|
+
simplecov-html (0.7.1)
|
40
|
+
timers (1.1.0)
|
41
|
+
yard (0.8.6.1)
|
42
|
+
|
43
|
+
PLATFORMS
|
44
|
+
java
|
45
|
+
ruby
|
46
|
+
|
47
|
+
DEPENDENCIES
|
48
|
+
activesupport
|
49
|
+
bundler (>= 1.0.0)
|
50
|
+
celluloid (>= 0.11.1, < 0.14.0)
|
51
|
+
fakeweb
|
52
|
+
faraday (>= 0.8.1)
|
53
|
+
hashie (>= 1.2.0)
|
54
|
+
jeweler (>= 1.8.4)
|
55
|
+
rdoc (>= 3.12)
|
56
|
+
rspec (>= 2.8.0)
|
57
|
+
simplecov
|
58
|
+
yard (>= 0.7)
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|

|
4
4
|
|
5
|
-
A Ruby library for consuming the AT&T [Speech API](https://developer.att.com/developer/apiDetailPage.jsp?passedItemId=10700023) for speech to text. API details may be found [here](
|
5
|
+
A Ruby library for consuming the AT&T [Speech API](https://developer.att.com/developer/apiDetailPage.jsp?passedItemId=10700023) for speech to text. API details may be found [here](https://developer.att.com/developer/basicTemplate.jsp?passedItemId=13100102&api=Speech&version=3).
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -15,8 +15,9 @@ gem install att_speech
|
|
15
15
|
```ruby
|
16
16
|
require 'att_speech'
|
17
17
|
|
18
|
-
att_speech = ATTSpeech.new({ :api_key => ENV['ATT_SPEECH_KEY'],
|
19
|
-
:secret_key => ENV['ATT_SPEECH_SECRET']
|
18
|
+
att_speech = ATTSpeech.new({ :api_key => ENV['ATT_SPEECH_KEY'],
|
19
|
+
:secret_key => ENV['ATT_SPEECH_SECRET'],
|
20
|
+
:scope => 'SPEECH' }) })
|
20
21
|
|
21
22
|
# Read the audio file contents
|
22
23
|
file_contents = File.read(File.expand_path(File.dirname(File.dirname(__FILE__))) + "/bostonSeltics.wav")
|
@@ -30,15 +31,39 @@ future = att_speech.future(:speech_to_text, file_contents, type='audio/wav')
|
|
30
31
|
p future.value
|
31
32
|
|
32
33
|
# Non-blocking operation that will call a block when the transcrption is returned
|
33
|
-
# Note: Remember, this is a concurrent operation so don't pass self and avoid mutable objects in the block
|
34
|
-
# from the calling context, better to have discreet actions contained in the block, such as inserting in a
|
34
|
+
# Note: Remember, this is a concurrent operation so don't pass self and avoid mutable objects in the block
|
35
|
+
# from the calling context, better to have discreet actions contained in the block, such as inserting in a
|
35
36
|
# datastore
|
36
37
|
sleep 2
|
37
38
|
att_speech.speech_to_text!(file_contents) { |transcription| p transcription }
|
38
39
|
sleep 5
|
40
|
+
|
41
|
+
|
42
|
+
def write_wav_file(audio_bytes)
|
43
|
+
file_name = "ret_audio-#{Time.now.strftime('%Y%m%d-%H%M%S')}.wav"
|
44
|
+
full_file_name = File.expand_path(File.join(File.dirname(File.dirname(__FILE__)), 'examples', file_name))
|
45
|
+
audio_file = File.open(full_file_name, "w")
|
46
|
+
audio_file << audio_bytes
|
47
|
+
audio_file.close
|
48
|
+
end
|
49
|
+
|
50
|
+
att_text = ATTSpeech.new({ :api_key => ENV['ATT_SPEECH_KEY'],
|
51
|
+
:secret_key => ENV['ATT_SPEECH_SECRET'],
|
52
|
+
:scope => 'TTS' })
|
53
|
+
|
54
|
+
# Read the text file contents
|
55
|
+
tfp = File.expand_path(File.join(File.dirname(File.dirname(__FILE__)), 'examples', 'helloWorld.txt'))
|
56
|
+
txt_contents = File.read(tfp)
|
57
|
+
|
58
|
+
audio = att_text.text_to_speech(txt_contents)
|
59
|
+
write_wav_file(audio)
|
60
|
+
|
61
|
+
# Non-blocking operation with a future, if you have a longer file that requires more processing time
|
62
|
+
sleep 2
|
63
|
+
future = att_text.future(:text_to_speech, "This is a hello world.", type='text/plain')
|
64
|
+
write_wav_file(future.value)
|
39
65
|
```
|
40
66
|
|
41
67
|
## Copyright
|
42
68
|
|
43
|
-
Copyright (c)
|
44
|
-
|
69
|
+
Copyright (c) 2013 Jason Goecke. See LICENSE.txt for further details.
|
data/Rakefile
CHANGED
@@ -17,10 +17,10 @@ Jeweler::Tasks.new do |gem|
|
|
17
17
|
gem.name = "att_speech"
|
18
18
|
gem.homepage = "http://github.com/jsgoecke/att_speech"
|
19
19
|
gem.license = "MIT"
|
20
|
-
gem.summary = %Q{
|
21
|
-
gem.description = %Q{
|
20
|
+
gem.summary = %Q{A Ruby library for consuming the AT&T Speech API https://developer.att.com/developer/forward.jsp?passedItemId=12500023 for speech->text, and text->speech.}
|
21
|
+
gem.description = %Q{A Ruby library for consuming v3 of the AT&T Speech API for speech->text, and text->speech. Takes in either .wav or specific other audio files, and returns a text string of the spoken words. Can also take in either a text string or .txt file and returns a string of bytes from which a .wav file can be created of the spoken text.}
|
22
22
|
gem.email = "jason@goecke.net"
|
23
|
-
gem.authors = ["Jason Goecke"]
|
23
|
+
gem.authors = ["Jason Goecke, Peter Wilson"]
|
24
24
|
# dependencies defined in Gemfile
|
25
25
|
end
|
26
26
|
Jeweler::RubygemsDotOrgTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.4
|
data/att_speech.gemspec
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "att_speech"
|
8
|
+
s.version = "0.0.4"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Jason Goecke, Peter Wilson"]
|
12
|
+
s.date = "2013-05-09"
|
13
|
+
s.description = "A Ruby library for consuming v3 of the AT&T Speech API for speech->text, and text->speech. Takes in either .wav or specific other audio files, and returns a text string of the spoken words. Can also take in either a text string or .txt file and returns a string of bytes from which a .wav file can be created of the spoken text."
|
14
|
+
s.email = "jason@goecke.net"
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.md"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".rspec",
|
22
|
+
".travis.yml",
|
23
|
+
"Gemfile",
|
24
|
+
"Gemfile.lock",
|
25
|
+
"LICENSE.txt",
|
26
|
+
"README.md",
|
27
|
+
"Rakefile",
|
28
|
+
"VERSION",
|
29
|
+
"att_speech.gemspec",
|
30
|
+
"examples/bostonSeltics.wav",
|
31
|
+
"examples/example.rb",
|
32
|
+
"examples/helloWorld.txt",
|
33
|
+
"lib/att_speech.rb",
|
34
|
+
"lib/att_speech/att_speech.rb",
|
35
|
+
"lib/att_speech/version.rb",
|
36
|
+
"spec/att_speech_spec.rb",
|
37
|
+
"spec/spec_helper.rb"
|
38
|
+
]
|
39
|
+
s.homepage = "http://github.com/jsgoecke/att_speech"
|
40
|
+
s.licenses = ["MIT"]
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = "1.8.24"
|
43
|
+
s.summary = "A Ruby library for consuming the AT&T Speech API https://developer.att.com/developer/forward.jsp?passedItemId=12500023 for speech->text, and text->speech."
|
44
|
+
|
45
|
+
if s.respond_to? :specification_version then
|
46
|
+
s.specification_version = 3
|
47
|
+
|
48
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
+
s.add_runtime_dependency(%q<faraday>, [">= 0.8.1"])
|
50
|
+
s.add_runtime_dependency(%q<celluloid>, ["< 0.14.0", ">= 0.11.1"])
|
51
|
+
s.add_runtime_dependency(%q<hashie>, [">= 1.2.0"])
|
52
|
+
s.add_runtime_dependency(%q<activesupport>, [">= 0"])
|
53
|
+
s.add_development_dependency(%q<rspec>, [">= 2.8.0"])
|
54
|
+
s.add_development_dependency(%q<yard>, [">= 0.7"])
|
55
|
+
s.add_development_dependency(%q<rdoc>, [">= 3.12"])
|
56
|
+
s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
|
57
|
+
s.add_development_dependency(%q<jeweler>, [">= 1.8.4"])
|
58
|
+
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
59
|
+
s.add_development_dependency(%q<fakeweb>, [">= 0"])
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<faraday>, [">= 0.8.1"])
|
62
|
+
s.add_dependency(%q<celluloid>, ["< 0.14.0", ">= 0.11.1"])
|
63
|
+
s.add_dependency(%q<hashie>, [">= 1.2.0"])
|
64
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
65
|
+
s.add_dependency(%q<rspec>, [">= 2.8.0"])
|
66
|
+
s.add_dependency(%q<yard>, [">= 0.7"])
|
67
|
+
s.add_dependency(%q<rdoc>, [">= 3.12"])
|
68
|
+
s.add_dependency(%q<bundler>, [">= 1.0.0"])
|
69
|
+
s.add_dependency(%q<jeweler>, [">= 1.8.4"])
|
70
|
+
s.add_dependency(%q<simplecov>, [">= 0"])
|
71
|
+
s.add_dependency(%q<fakeweb>, [">= 0"])
|
72
|
+
end
|
73
|
+
else
|
74
|
+
s.add_dependency(%q<faraday>, [">= 0.8.1"])
|
75
|
+
s.add_dependency(%q<celluloid>, ["< 0.14.0", ">= 0.11.1"])
|
76
|
+
s.add_dependency(%q<hashie>, [">= 1.2.0"])
|
77
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
78
|
+
s.add_dependency(%q<rspec>, [">= 2.8.0"])
|
79
|
+
s.add_dependency(%q<yard>, [">= 0.7"])
|
80
|
+
s.add_dependency(%q<rdoc>, [">= 3.12"])
|
81
|
+
s.add_dependency(%q<bundler>, [">= 1.0.0"])
|
82
|
+
s.add_dependency(%q<jeweler>, [">= 1.8.4"])
|
83
|
+
s.add_dependency(%q<simplecov>, [">= 0"])
|
84
|
+
s.add_dependency(%q<fakeweb>, [">= 0"])
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
Binary file
|
data/examples/example.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
$LOAD_PATH << './lib'
|
2
|
+
|
3
|
+
lp = File.join(File.dirname(File.dirname(__FILE__)), 'lib', 'att_speech.rb')
|
4
|
+
require lp
|
5
|
+
|
6
|
+
att_speech = ATTSpeech.new({ :api_key => ENV['ATT_SPEECH_KEY'],
|
7
|
+
:secret_key => ENV['ATT_SPEECH_SECRET'],
|
8
|
+
:scope => 'SPEECH' })
|
9
|
+
|
10
|
+
# Read the audio file contents
|
11
|
+
fp = File.expand_path(File.join(File.dirname(File.dirname(__FILE__)), 'examples', 'bostonSeltics.wav'))
|
12
|
+
file_contents = File.read(fp)
|
13
|
+
|
14
|
+
# Blocking operation
|
15
|
+
p att_speech.speech_to_text(file_contents, type='audio/wav')
|
16
|
+
|
17
|
+
# Non-blocking operation with a future, if you have a longer file that requires more processing time
|
18
|
+
sleep 2
|
19
|
+
future = att_speech.future(:speech_to_text, file_contents, type='audio/wav')
|
20
|
+
p future.value
|
21
|
+
|
22
|
+
# Non-blocking operation that will call a block when the transcrption is returned
|
23
|
+
# Note: Remember, this is a concurrent operation so don't pass self and avoid mutable objects in the block
|
24
|
+
# from the calling context, better to have discreet actions contained in the block, such as inserting in a
|
25
|
+
# datastore
|
26
|
+
sleep 2
|
27
|
+
att_speech.speech_to_text!(file_contents) { |transcription| p transcription }
|
28
|
+
sleep 5
|
29
|
+
|
30
|
+
|
31
|
+
def write_wav_file(audio_bytes)
|
32
|
+
file_name = "ret_audio-#{Time.now.strftime('%Y%m%d-%H%M%S')}.wav"
|
33
|
+
full_file_name = File.expand_path(File.join(File.dirname(File.dirname(__FILE__)), 'examples', file_name))
|
34
|
+
audio_file = File.open(full_file_name, "w")
|
35
|
+
audio_file << audio_bytes
|
36
|
+
audio_file.close
|
37
|
+
end
|
38
|
+
|
39
|
+
att_text = ATTSpeech.new({ :api_key => ENV['ATT_SPEECH_KEY'],
|
40
|
+
:secret_key => ENV['ATT_SPEECH_SECRET'],
|
41
|
+
:scope => 'TTS' })
|
42
|
+
|
43
|
+
# Read the text file contents
|
44
|
+
tfp = File.expand_path(File.join(File.dirname(File.dirname(__FILE__)), 'examples', 'helloWorld.txt'))
|
45
|
+
txt_contents = File.read(tfp)
|
46
|
+
|
47
|
+
audio = att_text.text_to_speech(txt_contents)
|
48
|
+
write_wav_file(audio)
|
49
|
+
|
50
|
+
# Non-blocking operation with a future, if you have a longer file that requires more processing time
|
51
|
+
sleep 2
|
52
|
+
future = att_text.future(:text_to_speech, "This is a hello world.", type='text/plain')
|
53
|
+
write_wav_file(future.value)
|
@@ -0,0 +1 @@
|
|
1
|
+
Hello World!
|
@@ -1,157 +1,192 @@
|
|
1
1
|
class ATTSpeech
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
2
|
+
include Celluloid
|
3
|
+
Celluloid.logger = nil
|
4
|
+
|
5
|
+
attr_reader :api_key, :secret_key, :access_token, :refresh_token, :base_url, :ssl_verify, :scope
|
6
|
+
|
7
|
+
##
|
8
|
+
# Creates an ATTSpeech object
|
9
|
+
#
|
10
|
+
# @overload initialize(args)
|
11
|
+
# @param [Hash] args the options to intantiate with
|
12
|
+
# @option args [String] :api_key the AT&T Speech API Key
|
13
|
+
# @option args [String] :secret_key the AT&T Speech API Secret Key
|
14
|
+
# @option args [String] :scope the Authorization Scope for the AT&T Speech API
|
15
|
+
# @option args [String] :base_url the url for the AT&T Speech API, default is 'https://api.att.com'
|
16
|
+
# @option args [Boolean] :ssl_verify determines if the peer Cert is verified for SSL, default is true
|
17
|
+
# @overload initialize(api_key, secret_key, base_url='https://api.att.com')
|
18
|
+
# @param [String] api_key the AT&T Speech API Key
|
19
|
+
# @param [String] secret_key the AT&T Speech API Secret Key
|
20
|
+
# @param [String] scope the Authorization Scope for the AT&T Speech API
|
21
|
+
# @param [String] base_url the url for the AT&T Speech API, default is 'https://api.att.com'
|
22
|
+
# @param [Boolean] ssl_verify determines if the peer Cert is verified for SSL, default is true
|
23
|
+
#
|
24
|
+
# @return [Object] an instance of ATTSpeech
|
25
|
+
def initialize(*args)
|
26
|
+
raise ArgumentError, "Requires at least the api_key, secret_key, and scope when instatiating" if args.size == 0
|
27
|
+
|
28
|
+
base_url = 'https://api.att.com'
|
29
|
+
|
30
|
+
if args.size == 1 && args[0].instance_of?(Hash)
|
31
|
+
@api_key = args[0][:api_key]
|
32
|
+
@secret_key = args[0][:secret_key]
|
33
|
+
@scope = args[0][:scope]
|
34
|
+
@base_url = args[0][:base_url] || base_url
|
35
|
+
set_ssl_verify args[0][:ssl_verify]
|
36
|
+
else
|
37
|
+
@api_key = args[0]
|
38
|
+
@secret_key = args[1]
|
39
|
+
@scope = args[2]
|
40
|
+
@base_url = args[3] || base_url
|
41
|
+
set_ssl_verify args[4]
|
42
|
+
end
|
43
|
+
|
44
|
+
raise ArgumentError, "scope must be either 'SPEECH' or 'TTS'" unless (@scope == 'SPEECH') || (@scope == 'TTS')
|
45
|
+
|
46
|
+
@grant_type = 'client_credentials'
|
47
|
+
@access_token = ''
|
48
|
+
@refresh_token = ''
|
49
|
+
|
50
|
+
if @scope == 'SPEECH'
|
51
|
+
create_connection 'application/json'
|
52
|
+
else
|
53
|
+
create_connection 'audio/x-wav'
|
54
|
+
end
|
55
|
+
|
56
|
+
get_tokens
|
57
|
+
|
58
|
+
Actor.current
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# Allows you to send a file and return the speech to text result
|
63
|
+
# @param [String] file_contents to be processed
|
64
|
+
# @param [String] type of file to be processed, may be audio/wav, application/octet-stream or audio/amr
|
65
|
+
# @param [String] speech_context to use to evaluate the audio BusinessSearch, Gaming, Generic, QuestionAndAnswer, SMS, SocialMedia, TV, VoiceMail, WebSearch
|
66
|
+
# @param [Block] block to be called when the transcription completes
|
67
|
+
#
|
68
|
+
# @return [Hash] the resulting response from the AT&T Speech API
|
69
|
+
def speech_to_text(file_contents, type='audio/wav', speech_context='Generic', &block)
|
70
|
+
resource = "/speech/v3/speechToText"
|
71
|
+
|
72
|
+
if type == "application/octet-stream"
|
73
|
+
type = "audio/amr"
|
74
|
+
end
|
75
|
+
|
76
|
+
begin
|
77
|
+
response = @connection.post( resource,
|
78
|
+
file_contents,
|
79
|
+
:Authorization => "Bearer #{@access_token}",
|
80
|
+
:Content_Transfer_Encoding => 'chunked',
|
81
|
+
:X_SpeechContext => speech_context,
|
82
|
+
:Content_Type => type,
|
83
|
+
:Accept => 'application/json' )
|
84
|
+
|
85
|
+
result = process_response(response)
|
86
|
+
block.call result if block_given?
|
87
|
+
result
|
88
|
+
rescue => e
|
89
|
+
raise RuntimeError, e.to_s
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
##
|
95
|
+
# Allows you to send a string or plain text file and return the text to speech result
|
96
|
+
# @param [String] string or file_contents to be processed
|
97
|
+
# @param [String] type of file or object to be processed, may be text/plain, or application/ssml+xml
|
98
|
+
#
|
99
|
+
# @return [String] the bytes of the resulting response from the AT&T Speech API
|
100
|
+
def text_to_speech(file_contents, type='text/plain')
|
101
|
+
resource = "/speech/v3/textToSpeech"
|
102
|
+
|
103
|
+
begin
|
104
|
+
response = @connection.post( resource,
|
105
|
+
file_contents,
|
106
|
+
:Authorization => "Bearer #{@access_token}",
|
107
|
+
:Content_Type => type,
|
108
|
+
:Accept => 'audio/x-wav' )
|
109
|
+
|
110
|
+
response.body
|
111
|
+
rescue => e
|
112
|
+
raise RuntimeError, e.to_s
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
##
|
120
|
+
# Creates the Faraday connection object
|
121
|
+
def create_connection(accept_type='application/json')
|
122
|
+
@connection = Faraday.new(:url => @base_url, :ssl => { :verify => @ssl_verify }) do |faraday|
|
123
|
+
faraday.headers['Accept'] = accept_type
|
124
|
+
faraday.adapter Faraday.default_adapter
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# Obtains the session tokens
|
130
|
+
def get_tokens
|
131
|
+
resource = "/oauth/access_token"
|
132
|
+
|
133
|
+
begin
|
134
|
+
response = @connection.post resource do |request|
|
135
|
+
request.params['client_id'] = @api_key
|
136
|
+
request.params['client_secret'] = @secret_key
|
137
|
+
request.params['grant_type'] = @grant_type
|
138
|
+
request.params['scope'] = @scope
|
139
|
+
end
|
140
|
+
|
141
|
+
result = process_response(response)
|
142
|
+
|
143
|
+
if result[:access_token].nil? || result[:refresh_token].nil?
|
144
|
+
raise RuntimeError, "Unable to complete oauth: #{response[:error]}"
|
145
|
+
else
|
146
|
+
@access_token = result[:access_token]
|
147
|
+
@refresh_token = result[:refresh_token]
|
148
|
+
end
|
149
|
+
rescue => e
|
150
|
+
raise RuntimeError, e.to_s
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
##
|
155
|
+
# Process the JSON returned into a Hashie::Mash and making it more Ruby friendly
|
156
|
+
#
|
157
|
+
# @param [String] reponse json
|
158
|
+
#
|
159
|
+
# @return [Object] a Hashie::Mash object
|
160
|
+
def process_response(response)
|
161
|
+
Hashie::Mash.new(underscore_hash(JSON.parse(response.body)))
|
162
|
+
end
|
163
|
+
|
164
|
+
##
|
165
|
+
# Sets the ssl_verify option
|
166
|
+
#
|
167
|
+
# @param [Boolean] ssl_verify the variable to set
|
168
|
+
def set_ssl_verify(ssl_verify)
|
169
|
+
if ssl_verify == false
|
170
|
+
@ssl_verify = false
|
171
|
+
else
|
172
|
+
@ssl_verify = true
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
##
|
177
|
+
# Decamelizes the keys in a hash to be more Ruby friendly
|
178
|
+
#
|
179
|
+
# @param [Hash] hash to be decamelized
|
180
|
+
#
|
181
|
+
# @return [Hash] the hash with the keys decamalized
|
182
|
+
def underscore_hash(hash)
|
183
|
+
hash.inject({}) do |underscored, (key, value)|
|
184
|
+
value = underscore_hash(value) if value.is_a?(Hash)
|
185
|
+
if value.is_a?(Array)
|
186
|
+
value = underscore_hash(value[0]) if value[0].is_a?(Hash)
|
187
|
+
end
|
188
|
+
underscored[key.underscore] = value
|
189
|
+
underscored
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
data/lib/att_speech/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
class ATTSpeech
|
2
|
-
|
3
|
-
end
|
2
|
+
VERSION = "0.0.4"
|
3
|
+
end
|
data/spec/att_speech_spec.rb
CHANGED
@@ -2,106 +2,125 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
2
2
|
|
3
3
|
describe "AttSpeech" do
|
4
4
|
FakeWeb.allow_net_connect = false
|
5
|
-
|
6
|
-
FakeWeb.register_uri(:post,
|
7
|
-
"https://api.att.com/oauth/access_token?client_id=1234&client_secret=abcd&grant_type=client_credentials&scope=SPEECH",
|
8
|
-
:status => ['200', 'OK'],
|
5
|
+
|
6
|
+
FakeWeb.register_uri(:post,
|
7
|
+
"https://api.att.com/oauth/access_token?client_id=1234&client_secret=abcd&grant_type=client_credentials&scope=SPEECH",
|
8
|
+
:status => ['200', 'OK'],
|
9
9
|
:body => '{"access_token":"5678","refresh_token":"wxyz"}')
|
10
|
-
|
11
|
-
FakeWeb.register_uri(:post,
|
12
|
-
"http://foobar.com/oauth/access_token?client_id=1234&client_secret=abcd&grant_type=client_credentials&scope=SPEECH",
|
13
|
-
:status => ['200', 'OK'],
|
10
|
+
|
11
|
+
FakeWeb.register_uri(:post,
|
12
|
+
"http://foobar.com/oauth/access_token?client_id=1234&client_secret=abcd&grant_type=client_credentials&scope=SPEECH",
|
13
|
+
:status => ['200', 'OK'],
|
14
14
|
:body => '{"access_token":"5678","refresh_token":"wxyz"}')
|
15
|
-
|
16
|
-
FakeWeb.register_uri(:post,
|
17
|
-
"https://api.att.com/
|
18
|
-
:status => ['200', 'OK'],
|
15
|
+
|
16
|
+
FakeWeb.register_uri(:post,
|
17
|
+
"https://api.att.com/speech/v3/speechToText",
|
18
|
+
:status => ['200', 'OK'],
|
19
19
|
:body => "{\"Recognition\":{\"ResponseId\":\"2b0bdcf4301f5c4aba57e2765b59bcbe\",\"NBest\":[{\"WordScores\":[1,1],\"Confidence\":1,\"Grade\":\"accept\",\"ResultText\":\"Boston celtics.\",\"Words\":[\"Boston\",\"celtics.\"],\"LanguageId\":\"en-us\",\"Hypothesis\":\"Boston celtics.\"}]}}")
|
20
|
-
|
21
|
-
let(:att_speech) { att_speech = ATTSpeech.new
|
20
|
+
|
21
|
+
let(:att_speech) { att_speech = ATTSpeech.new('1234', 'abcd', 'SPEECH') }
|
22
22
|
let(:att_speech_hash) { att_speech = ATTSpeech.new({ :api_key => '1234',
|
23
|
-
:secret_key => 'abcd'
|
24
|
-
|
23
|
+
:secret_key => 'abcd',
|
24
|
+
:scope => 'SPEECH' })}
|
25
|
+
|
26
|
+
|
25
27
|
describe 'initializing' do
|
26
28
|
it "should raise an error of no parameters passed when creating object" do
|
27
29
|
begin
|
28
30
|
ATTSpeech.new
|
29
31
|
rescue => e
|
30
|
-
e.to_s.should eql "
|
32
|
+
e.to_s.should eql "Requires at least the api_key, secret_key, and scope when instatiating"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
it "shoud raise an error of wrong scope when creating object without scope" do
|
37
|
+
begin
|
38
|
+
ATTSpeech.new('1234', 'abcd')
|
39
|
+
rescue => e
|
40
|
+
e.to_s.should eql "scope must be either 'SPEECH' or 'TTS'"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
it "shoud raise an error of wrong scope when creating object with misspelled scope" do
|
45
|
+
begin
|
46
|
+
ATTSpeech.new('1234', 'abcd', 'misspelled scope')
|
47
|
+
rescue => e
|
48
|
+
e.to_s.should eql "scope must be either 'SPEECH' or 'TTS'"
|
31
49
|
end
|
32
50
|
end
|
33
|
-
|
51
|
+
|
34
52
|
it "should create an ATTSpeech object" do
|
35
53
|
att_speech.class.should eql ATTSpeech
|
36
54
|
att_speech_hash.class.should eql ATTSpeech
|
37
55
|
end
|
38
|
-
|
56
|
+
|
39
57
|
it 'should set the url to something different' do
|
40
|
-
as = ATTSpeech.new('1234', 'abcd', 'http://foobar.com', false)
|
58
|
+
as = ATTSpeech.new('1234', 'abcd', 'SPEECH', 'http://foobar.com', false)
|
41
59
|
as.base_url.should == 'http://foobar.com'
|
42
60
|
as.ssl_verify.should == false
|
43
|
-
|
61
|
+
|
44
62
|
as = ATTSpeech.new({ :api_key => '1234',
|
45
63
|
:secret_key => 'abcd',
|
64
|
+
:scope => 'SPEECH',
|
46
65
|
:base_url => 'http://foobar.com',
|
47
66
|
:ssl_verify => false })
|
48
67
|
as.base_url.should == 'http://foobar.com'
|
49
68
|
as.ssl_verify.should == false
|
50
69
|
end
|
51
|
-
|
70
|
+
|
52
71
|
it "should set the access_token and refresh_token" do
|
53
72
|
att_speech.access_token.should eql '5678'
|
54
73
|
att_speech.refresh_token.should eql 'wxyz'
|
55
74
|
att_speech.base_url.should == 'https://api.att.com'
|
56
75
|
att_speech.ssl_verify.should == true
|
57
|
-
|
76
|
+
|
58
77
|
att_speech_hash.access_token.should eql '5678'
|
59
78
|
att_speech_hash.refresh_token.should eql 'wxyz'
|
60
79
|
att_speech_hash.base_url.should == 'https://api.att.com'
|
61
80
|
att_speech_hash.ssl_verify.should == true
|
62
81
|
end
|
63
82
|
end
|
64
|
-
|
83
|
+
|
65
84
|
describe 'blocking call' do
|
66
85
|
it "should return a Hashie::Mash object when processing an audio file" do
|
67
86
|
result = att_speech.speech_to_text 'spec/spec_helper.rb'
|
68
87
|
result.instance_of?(Hashie::Mash).should eql true
|
69
|
-
|
88
|
+
|
70
89
|
result = att_speech_hash.speech_to_text 'spec/spec_helper.rb'
|
71
90
|
result.instance_of?(Hashie::Mash).should eql true
|
72
91
|
end
|
73
|
-
|
92
|
+
|
74
93
|
it "should attempt to process an audio file" do
|
75
94
|
result = att_speech.speech_to_text 'spec/spec_helper.rb'
|
76
95
|
result[:recognition][:response_id].should eql '2b0bdcf4301f5c4aba57e2765b59bcbe'
|
77
96
|
result[:recognition][:n_best][:confidence].should eql 1
|
78
|
-
|
97
|
+
|
79
98
|
result = att_speech_hash.speech_to_text 'spec/spec_helper.rb'
|
80
99
|
result[:recognition][:response_id].should eql '2b0bdcf4301f5c4aba57e2765b59bcbe'
|
81
100
|
result[:recognition][:n_best][:confidence].should eql 1
|
82
101
|
end
|
83
102
|
end
|
84
|
-
|
103
|
+
|
85
104
|
describe 'non-blocking call' do
|
86
105
|
it "should return a Celluloid::Future object when processing an audio file" do
|
87
106
|
future = att_speech.future(:speech_to_text, 'spec/spec_helper.rb')
|
88
107
|
future.instance_of?(Celluloid::Future).should eql true
|
89
|
-
|
108
|
+
|
90
109
|
future = att_speech_hash.future(:speech_to_text, 'spec/spec_helper.rb')
|
91
110
|
future.instance_of?(Celluloid::Future).should eql true
|
92
111
|
end
|
93
|
-
|
112
|
+
|
94
113
|
it "should allow us to use a future to process an audio file" do
|
95
114
|
future = att_speech.future(:speech_to_text, 'spec/spec_helper.rb')
|
96
115
|
future.value[:recognition][:response_id].should eql '2b0bdcf4301f5c4aba57e2765b59bcbe'
|
97
116
|
future.value[:recognition][:n_best][:confidence].should eql 1
|
98
|
-
|
117
|
+
|
99
118
|
future = att_speech_hash.future(:speech_to_text, 'spec/spec_helper.rb')
|
100
119
|
future.value[:recognition][:response_id].should eql '2b0bdcf4301f5c4aba57e2765b59bcbe'
|
101
120
|
future.value[:recognition][:n_best][:confidence].should eql 1
|
102
121
|
end
|
103
122
|
end
|
104
|
-
|
123
|
+
|
105
124
|
describe 'non-blocking call with a block' do
|
106
125
|
it "should allow us to use a future to process an audio file and pass a block" do
|
107
126
|
result = nil
|
@@ -109,7 +128,7 @@ describe "AttSpeech" do
|
|
109
128
|
sleep 0.5
|
110
129
|
result[:recognition][:response_id].should eql '2b0bdcf4301f5c4aba57e2765b59bcbe'
|
111
130
|
result[:recognition][:n_best][:confidence].should eql 1
|
112
|
-
|
131
|
+
|
113
132
|
result = nil
|
114
133
|
att_speech_hash.speech_to_text!('spec/spec_helper.rb') { |transcription| result = transcription }
|
115
134
|
sleep 0.5
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: att_speech
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
|
-
- Jason Goecke
|
8
|
+
- Jason Goecke, Peter Wilson
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-05-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: faraday
|
@@ -32,6 +32,9 @@ dependencies:
|
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
none: false
|
34
34
|
requirements:
|
35
|
+
- - <
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.14.0
|
35
38
|
- - ! '>='
|
36
39
|
- !ruby/object:Gem::Version
|
37
40
|
version: 0.11.1
|
@@ -40,6 +43,9 @@ dependencies:
|
|
40
43
|
version_requirements: !ruby/object:Gem::Requirement
|
41
44
|
none: false
|
42
45
|
requirements:
|
46
|
+
- - <
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 0.14.0
|
43
49
|
- - ! '>='
|
44
50
|
- !ruby/object:Gem::Version
|
45
51
|
version: 0.11.1
|
@@ -187,7 +193,11 @@ dependencies:
|
|
187
193
|
- - ! '>='
|
188
194
|
- !ruby/object:Gem::Version
|
189
195
|
version: '0'
|
190
|
-
description: A Ruby library for consuming the AT&T Speech API for speech
|
196
|
+
description: A Ruby library for consuming v3 of the AT&T Speech API for speech->text,
|
197
|
+
and text->speech. Takes in either .wav or specific other audio files, and returns
|
198
|
+
a text string of the spoken words. Can also take in either a text string or .txt
|
199
|
+
file and returns a string of bytes from which a .wav file can be created of the
|
200
|
+
spoken text.
|
191
201
|
email: jason@goecke.net
|
192
202
|
executables: []
|
193
203
|
extensions: []
|
@@ -197,16 +207,22 @@ extra_rdoc_files:
|
|
197
207
|
files:
|
198
208
|
- .document
|
199
209
|
- .rspec
|
210
|
+
- .travis.yml
|
200
211
|
- Gemfile
|
212
|
+
- Gemfile.lock
|
201
213
|
- LICENSE.txt
|
214
|
+
- README.md
|
202
215
|
- Rakefile
|
203
216
|
- VERSION
|
217
|
+
- att_speech.gemspec
|
218
|
+
- examples/bostonSeltics.wav
|
219
|
+
- examples/example.rb
|
220
|
+
- examples/helloWorld.txt
|
204
221
|
- lib/att_speech.rb
|
205
|
-
- lib/att_speech/version.rb
|
206
222
|
- lib/att_speech/att_speech.rb
|
223
|
+
- lib/att_speech/version.rb
|
207
224
|
- spec/att_speech_spec.rb
|
208
225
|
- spec/spec_helper.rb
|
209
|
-
- README.md
|
210
226
|
homepage: http://github.com/jsgoecke/att_speech
|
211
227
|
licenses:
|
212
228
|
- MIT
|
@@ -228,10 +244,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
228
244
|
version: '0'
|
229
245
|
requirements: []
|
230
246
|
rubyforge_project:
|
231
|
-
rubygems_version: 1.8.
|
247
|
+
rubygems_version: 1.8.25
|
232
248
|
signing_key:
|
233
249
|
specification_version: 3
|
234
|
-
summary: A Ruby library for consuming the AT&T Speech API https://developer.att.com/developer/
|
235
|
-
for speech
|
250
|
+
summary: A Ruby library for consuming the AT&T Speech API https://developer.att.com/developer/forward.jsp?passedItemId=12500023
|
251
|
+
for speech->text, and text->speech.
|
236
252
|
test_files: []
|
237
|
-
has_rdoc:
|