google-cloud-speech 0.20.0 → 0.21.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/google-cloud-speech.rb +6 -13
- data/lib/google/cloud/speech.rb +102 -24
- data/lib/google/cloud/speech/audio.rb +73 -11
- data/lib/google/cloud/speech/job.rb +10 -15
- data/lib/google/cloud/speech/project.rb +142 -40
- data/lib/google/cloud/speech/result.rb +124 -10
- data/lib/google/cloud/speech/service.rb +33 -14
- data/lib/google/cloud/speech/stream.rb +563 -0
- data/lib/google/cloud/speech/v1beta1.rb +2 -0
- data/lib/google/cloud/speech/v1beta1/speech_api.rb +1 -0
- data/lib/google/cloud/speech/version.rb +1 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83a0ff44f2fad9796bdd39f3493c05e858bb7165
|
4
|
+
data.tar.gz: f5f63a9a256c082864f7fcbcdfa7741e8f55b494
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a077cbf101500f553b7156a23b44e83086e6ef2c452edaffaac4cdedda39ef86712da8e58821f933f6b8076b08e2de5132696e57856a0f528d756816bddee2ee
|
7
|
+
data.tar.gz: cac8e868df8240737eaa8733190743afaf314353f6ad806f885f6d8fd55d5d4e19dda6de4df0325666a621a44216b551883224058ec7c45a2be2b0d0a36f2df6
|
data/lib/google-cloud-speech.rb
CHANGED
@@ -40,7 +40,7 @@ module Google
|
|
40
40
|
# * `https://www.googleapis.com/auth/speech`
|
41
41
|
# @param [Integer] timeout Default timeout to use in requests. Optional.
|
42
42
|
# @param [Hash] client_config A hash of values to override the default
|
43
|
-
# behavior of the API client.
|
43
|
+
# behavior of the API client. Optional.
|
44
44
|
#
|
45
45
|
# @return [Google::Cloud::Speech::Project]
|
46
46
|
#
|
@@ -87,12 +87,12 @@ module Google
|
|
87
87
|
# * `https://www.googleapis.com/auth/speech`
|
88
88
|
# @param [Integer] timeout Default timeout to use in requests. Optional.
|
89
89
|
# @param [Hash] client_config A hash of values to override the default
|
90
|
-
# behavior of the API client.
|
90
|
+
# behavior of the API client. Optional.
|
91
91
|
#
|
92
92
|
# @return [Google::Cloud::Speech::Project]
|
93
93
|
#
|
94
94
|
# @example
|
95
|
-
# require "google/cloud
|
95
|
+
# require "google/cloud"
|
96
96
|
#
|
97
97
|
# speech = Google::Cloud.speech
|
98
98
|
#
|
@@ -102,16 +102,9 @@ module Google
|
|
102
102
|
def self.speech project = nil, keyfile = nil, scope: nil, timeout: nil,
|
103
103
|
client_config: nil
|
104
104
|
require "google/cloud/speech"
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
else
|
109
|
-
credentials = Google::Cloud::Speech::Credentials.new(
|
110
|
-
keyfile, scope: scope)
|
111
|
-
end
|
112
|
-
Google::Cloud::Speech::Project.new(
|
113
|
-
Google::Cloud::Speech::Service.new(
|
114
|
-
project, credentials, timeout: timeout, client_config: client_config))
|
105
|
+
Google::Cloud::Speech.new project: project, keyfile: keyfile,
|
106
|
+
scope: scope, timeout: timeout,
|
107
|
+
client_config: client_config
|
115
108
|
end
|
116
109
|
end
|
117
110
|
end
|
data/lib/google/cloud/speech.rb
CHANGED
@@ -51,10 +51,9 @@ module Google
|
|
51
51
|
# API. You can provide a file path:
|
52
52
|
#
|
53
53
|
# ```ruby
|
54
|
-
# require "google/cloud"
|
54
|
+
# require "google/cloud/speech"
|
55
55
|
#
|
56
|
-
#
|
57
|
-
# speech = gcloud.speech
|
56
|
+
# speech = Google::Cloud::Speech.new
|
58
57
|
#
|
59
58
|
# audio = speech.audio "path/to/audio.raw",
|
60
59
|
# encoding: :raw, sample_rate: 16000
|
@@ -63,10 +62,9 @@ module Google
|
|
63
62
|
# Or, you can initialize the audio instance with a Google Cloud Storage URI:
|
64
63
|
#
|
65
64
|
# ```ruby
|
66
|
-
# require "google/cloud"
|
65
|
+
# require "google/cloud/speech"
|
67
66
|
#
|
68
|
-
#
|
69
|
-
# speech = gcloud.speech
|
67
|
+
# speech = Google::Cloud::Speech.new
|
70
68
|
#
|
71
69
|
# audio = speech.audio "gs://bucket-name/path/to/audio.raw",
|
72
70
|
# encoding: :raw, sample_rate: 16000
|
@@ -75,15 +73,16 @@ module Google
|
|
75
73
|
# Or, with a Google Cloud Storage File object:
|
76
74
|
#
|
77
75
|
# ```ruby
|
78
|
-
# require "google/cloud"
|
76
|
+
# require "google/cloud/storage"
|
79
77
|
#
|
80
|
-
#
|
81
|
-
# storage = gcloud.storage
|
78
|
+
# storage = Google::Cloud::Storage.new
|
82
79
|
#
|
83
80
|
# bucket = storage.bucket "bucket-name"
|
84
81
|
# file = bucket.file "path/to/audio.raw"
|
85
82
|
#
|
86
|
-
#
|
83
|
+
# require "google/cloud/speech"
|
84
|
+
#
|
85
|
+
# speech = Google::Cloud::Speech.new
|
87
86
|
#
|
88
87
|
# audio = speech.audio file, encoding: :raw, sample_rate: 16000
|
89
88
|
# ```
|
@@ -95,16 +94,15 @@ module Google
|
|
95
94
|
# recognition operation.
|
96
95
|
#
|
97
96
|
# Use {Speech::Audio#recognize} for synchronous speech recognition that
|
98
|
-
# returns {Result} objects only after all audio has been processed.
|
99
|
-
# method is limited to audio data of 1 minute or less in duration, and
|
100
|
-
# take roughly the same amount of time to process as the duration of
|
101
|
-
# supplied audio data.
|
97
|
+
# returns {Speech::Result} objects only after all audio has been processed.
|
98
|
+
# This method is limited to audio data of 1 minute or less in duration, and
|
99
|
+
# will take roughly the same amount of time to process as the duration of
|
100
|
+
# the supplied audio data.
|
102
101
|
#
|
103
102
|
# ```ruby
|
104
|
-
# require "google/cloud"
|
103
|
+
# require "google/cloud/speech"
|
105
104
|
#
|
106
|
-
#
|
107
|
-
# speech = gcloud.speech
|
105
|
+
# speech = Google::Cloud::Speech.new
|
108
106
|
#
|
109
107
|
# audio = speech.audio "path/to/audio.raw",
|
110
108
|
# encoding: :raw, sample_rate: 16000
|
@@ -112,19 +110,18 @@ module Google
|
|
112
110
|
#
|
113
111
|
# result = results.first
|
114
112
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
115
|
-
# result.confidence #=>
|
113
|
+
# result.confidence #=> 0.9826789498329163
|
116
114
|
# ```
|
117
115
|
#
|
118
116
|
# Use {Speech::Audio#recognize_job} for asynchronous speech recognition,
|
119
|
-
# in which a {Job} is returned immediately after the audio data has
|
120
|
-
# been sent. The job can be refreshed to retrieve {Result} objects
|
117
|
+
# in which a {Speech::Job} is returned immediately after the audio data has
|
118
|
+
# been sent. The job can be refreshed to retrieve {Speech::Result} objects
|
121
119
|
# once the audio data has been processed.
|
122
120
|
#
|
123
121
|
# ```ruby
|
124
|
-
# require "google/cloud"
|
122
|
+
# require "google/cloud/speech"
|
125
123
|
#
|
126
|
-
#
|
127
|
-
# speech = gcloud.speech
|
124
|
+
# speech = Google::Cloud::Speech.new
|
128
125
|
#
|
129
126
|
# audio = speech.audio "path/to/audio.raw",
|
130
127
|
# encoding: :raw, sample_rate: 16000
|
@@ -137,10 +134,91 @@ module Google
|
|
137
134
|
#
|
138
135
|
# result = results.first
|
139
136
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
140
|
-
# result.confidence #=>
|
137
|
+
# result.confidence #=> 0.9826789498329163
|
138
|
+
# ```
|
139
|
+
#
|
140
|
+
# Use {Speech::Project#stream} for streaming audio data for speech
|
141
|
+
# recognition, in which a {Speech::Stream} is returned. The stream object
|
142
|
+
# can receive results while sending audio by performing bidirectional
|
143
|
+
# streaming speech-recognition.
|
144
|
+
#
|
145
|
+
# ```ruby
|
146
|
+
# require "google/cloud/speech"
|
147
|
+
#
|
148
|
+
# speech = Google::Cloud::Speech.new
|
149
|
+
#
|
150
|
+
# audio = speech.audio "path/to/audio.raw"
|
151
|
+
#
|
152
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
153
|
+
#
|
154
|
+
# # register callback for when a result is returned
|
155
|
+
# stream.on_result do |results|
|
156
|
+
# result = results.first
|
157
|
+
# result.transcript #=> "how old is the Brooklyn Bridge"
|
158
|
+
# result.confidence #=> 0.9826789498329163
|
159
|
+
# end
|
160
|
+
#
|
161
|
+
# # Stream 5 seconds of audio from the microhone
|
162
|
+
# # Actual implementation of microphone input varies by platform
|
163
|
+
# 5.times.do
|
164
|
+
# stream.send MicrophoneInput.read(32000)
|
165
|
+
# end
|
166
|
+
#
|
167
|
+
# stream.stop
|
141
168
|
# ```
|
142
169
|
#
|
170
|
+
# Obtaining audio data from input sources such as a Microphone is outside
|
171
|
+
# the scope of this document.
|
172
|
+
#
|
143
173
|
module Speech
|
174
|
+
##
|
175
|
+
# Creates a new object for connecting to the Speech service.
|
176
|
+
# Each call creates a new connection.
|
177
|
+
#
|
178
|
+
# For more information on connecting to Google Cloud see the
|
179
|
+
# [Authentication
|
180
|
+
# Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
|
181
|
+
#
|
182
|
+
# @param [String] project Project identifier for the Speech service you
|
183
|
+
# are connecting to.
|
184
|
+
# @param [String, Hash] keyfile Keyfile downloaded from Google Cloud. If
|
185
|
+
# file path the file must be readable.
|
186
|
+
# @param [String, Array<String>] scope The OAuth 2.0 scopes controlling
|
187
|
+
# the set of resources and operations that the connection can access.
|
188
|
+
# See [Using OAuth 2.0 to Access Google
|
189
|
+
# APIs](https://developers.google.com/identity/protocols/OAuth2).
|
190
|
+
#
|
191
|
+
# The default scope is:
|
192
|
+
#
|
193
|
+
# * `https://www.googleapis.com/auth/speech`
|
194
|
+
# @param [Integer] timeout Default timeout to use in requests. Optional.
|
195
|
+
# @param [Hash] client_config A hash of values to override the default
|
196
|
+
# behavior of the API client. Optional.
|
197
|
+
#
|
198
|
+
# @return [Google::Cloud::Speech::Project]
|
199
|
+
#
|
200
|
+
# @example
|
201
|
+
# require "google/cloud/speech"
|
202
|
+
#
|
203
|
+
# speech = Google::Cloud::Speech.new
|
204
|
+
#
|
205
|
+
# audio = speech.audio "path/to/audio.raw",
|
206
|
+
# encoding: :raw, sample_rate: 16000
|
207
|
+
#
|
208
|
+
def self.new project: nil, keyfile: nil, scope: nil, timeout: nil,
|
209
|
+
client_config: nil
|
210
|
+
project ||= Google::Cloud::Speech::Project.default_project
|
211
|
+
if keyfile.nil?
|
212
|
+
credentials = Google::Cloud::Speech::Credentials.default scope: scope
|
213
|
+
else
|
214
|
+
credentials = Google::Cloud::Speech::Credentials.new(
|
215
|
+
keyfile, scope: scope)
|
216
|
+
end
|
217
|
+
Google::Cloud::Speech::Project.new(
|
218
|
+
Google::Cloud::Speech::Service.new(
|
219
|
+
project, credentials, timeout: timeout,
|
220
|
+
client_config: client_config))
|
221
|
+
end
|
144
222
|
end
|
145
223
|
end
|
146
224
|
end
|
@@ -34,10 +34,9 @@ module Google
|
|
34
34
|
# Languages
|
35
35
|
#
|
36
36
|
# @example
|
37
|
-
# require "google/cloud"
|
37
|
+
# require "google/cloud/speech"
|
38
38
|
#
|
39
|
-
#
|
40
|
-
# speech = gcloud.speech
|
39
|
+
# speech = Google::Cloud::Speech.new
|
41
40
|
#
|
42
41
|
# audio = speech.audio "path/to/audio.raw",
|
43
42
|
# encoding: :raw, sample_rate: 16000
|
@@ -45,15 +44,80 @@ module Google
|
|
45
44
|
#
|
46
45
|
# result = results.first
|
47
46
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
48
|
-
# result.confidence #=>
|
47
|
+
# result.confidence #=> 0.9826789498329163
|
49
48
|
#
|
50
49
|
class Audio
|
51
50
|
# @private The V1beta1::RecognitionAudio object.
|
52
51
|
attr_reader :grpc
|
53
52
|
# @private The Project object.
|
54
53
|
attr_reader :speech
|
54
|
+
|
55
|
+
##
|
56
|
+
# Encoding of audio data to be recognized.
|
57
|
+
#
|
58
|
+
# Acceptable values are:
|
59
|
+
#
|
60
|
+
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
61
|
+
# (LINEAR16)
|
62
|
+
# * `flac` - The [Free Lossless Audio
|
63
|
+
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
64
|
+
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
65
|
+
# are supported. (FLAC)
|
66
|
+
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
67
|
+
# G.711 PCMU/mu-law. (MULAW)
|
68
|
+
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
69
|
+
# be 8000 Hz.) (AMR)
|
70
|
+
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
71
|
+
# be 16000 Hz.) (AMR_WB)
|
72
|
+
#
|
73
|
+
# @return [String,Symbol]
|
74
|
+
#
|
75
|
+
# @example
|
76
|
+
# require "google/cloud/speech"
|
77
|
+
#
|
78
|
+
# speech = Google::Cloud::Speech.new
|
79
|
+
#
|
80
|
+
# audio = speech.audio "path/to/audio.raw",
|
81
|
+
# encoding: :raw, sample_rate: 16000
|
82
|
+
#
|
55
83
|
attr_accessor :encoding
|
84
|
+
|
85
|
+
##
|
86
|
+
# Sample rate in Hertz of the audio data to be recognized. Valid values
|
87
|
+
# are: 8000-48000. 16000 is optimal. For best results, set the sampling
|
88
|
+
# rate of the audio source to 16000 Hz. If that's not possible, use the
|
89
|
+
# native sample rate of the audio source (instead of re-sampling).
|
90
|
+
#
|
91
|
+
# @return [Integer]
|
92
|
+
#
|
93
|
+
# @example
|
94
|
+
# require "google/cloud/speech"
|
95
|
+
#
|
96
|
+
# speech = Google::Cloud::Speech.new
|
97
|
+
#
|
98
|
+
# audio = speech.audio "path/to/audio.raw",
|
99
|
+
# encoding: :raw, sample_rate: 16000
|
100
|
+
#
|
56
101
|
attr_accessor :sample_rate
|
102
|
+
|
103
|
+
##
|
104
|
+
# The language of the supplied audio as a
|
105
|
+
# [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language code.
|
106
|
+
# If not specified, the language defaults to "en-US". See [Language
|
107
|
+
# Support](https://cloud.google.com/speech/docs/best-practices#language_support)
|
108
|
+
# for a list of the currently supported language codes.
|
109
|
+
#
|
110
|
+
# @return [String,Symbol]
|
111
|
+
#
|
112
|
+
# @example
|
113
|
+
# require "google/cloud/speech"
|
114
|
+
#
|
115
|
+
# speech = Google::Cloud::Speech.new
|
116
|
+
#
|
117
|
+
# audio = speech.audio "path/to/audio.raw",
|
118
|
+
# encoding: :raw, sample_rate: 16000,
|
119
|
+
# language: :en
|
120
|
+
#
|
57
121
|
attr_accessor :language
|
58
122
|
|
59
123
|
##
|
@@ -108,10 +172,9 @@ module Google
|
|
108
172
|
# @return [Array<Result>] The transcribed text of audio recognized.
|
109
173
|
#
|
110
174
|
# @example
|
111
|
-
# require "google/cloud"
|
175
|
+
# require "google/cloud/speech"
|
112
176
|
#
|
113
|
-
#
|
114
|
-
# speech = gcloud.speech
|
177
|
+
# speech = Google::Cloud::Speech.new
|
115
178
|
#
|
116
179
|
# audio = speech.audio "path/to/audio.raw",
|
117
180
|
# encoding: :raw, sample_rate: 16000
|
@@ -119,7 +182,7 @@ module Google
|
|
119
182
|
#
|
120
183
|
# result = results.first
|
121
184
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
122
|
-
# result.confidence #=>
|
185
|
+
# result.confidence #=> 0.9826789498329163
|
123
186
|
#
|
124
187
|
def recognize max_alternatives: nil, profanity_filter: nil, phrases: nil
|
125
188
|
ensure_speech!
|
@@ -156,10 +219,9 @@ module Google
|
|
156
219
|
# processing of a speech-recognition operation.
|
157
220
|
#
|
158
221
|
# @example
|
159
|
-
# require "google/cloud"
|
222
|
+
# require "google/cloud/speech"
|
160
223
|
#
|
161
|
-
#
|
162
|
-
# speech = gcloud.speech
|
224
|
+
# speech = Google::Cloud::Speech.new
|
163
225
|
#
|
164
226
|
# audio = speech.audio "path/to/audio.raw",
|
165
227
|
# encoding: :raw, sample_rate: 16000
|
@@ -33,10 +33,9 @@ module Google
|
|
33
33
|
# Long-running Operation
|
34
34
|
#
|
35
35
|
# @example
|
36
|
-
# require "google/cloud"
|
36
|
+
# require "google/cloud/speech"
|
37
37
|
#
|
38
|
-
#
|
39
|
-
# speech = gcloud.speech
|
38
|
+
# speech = Google::Cloud::Speech.new
|
40
39
|
#
|
41
40
|
# job = speech.recognize_job "path/to/audio.raw",
|
42
41
|
# encoding: :raw, sample_rate: 16000
|
@@ -69,10 +68,9 @@ module Google
|
|
69
68
|
# the job is not done this will return `nil`.
|
70
69
|
#
|
71
70
|
# @example
|
72
|
-
# require "google/cloud"
|
71
|
+
# require "google/cloud/speech"
|
73
72
|
#
|
74
|
-
#
|
75
|
-
# speech = gcloud.speech
|
73
|
+
# speech = Google::Cloud::Speech.new
|
76
74
|
#
|
77
75
|
# job = speech.recognize_job "path/to/audio.raw",
|
78
76
|
# encoding: :raw, sample_rate: 16000
|
@@ -99,10 +97,9 @@ module Google
|
|
99
97
|
# @return [boolean] `true` when complete, `false` otherwise.
|
100
98
|
#
|
101
99
|
# @example
|
102
|
-
# require "google/cloud"
|
100
|
+
# require "google/cloud/speech"
|
103
101
|
#
|
104
|
-
#
|
105
|
-
# speech = gcloud.speech
|
102
|
+
# speech = Google::Cloud::Speech.new
|
106
103
|
#
|
107
104
|
# job = speech.recognize_job "path/to/audio.raw",
|
108
105
|
# encoding: :raw, sample_rate: 16000
|
@@ -118,10 +115,9 @@ module Google
|
|
118
115
|
# processing of a speech-recognition operation.
|
119
116
|
#
|
120
117
|
# @example
|
121
|
-
# require "google/cloud"
|
118
|
+
# require "google/cloud/speech"
|
122
119
|
#
|
123
|
-
#
|
124
|
-
# speech = gcloud.speech
|
120
|
+
# speech = Google::Cloud::Speech.new
|
125
121
|
#
|
126
122
|
# job = speech.recognize_job "path/to/audio.raw",
|
127
123
|
# encoding: :raw, sample_rate: 16000
|
@@ -141,10 +137,9 @@ module Google
|
|
141
137
|
# reloads will incrementally increase.
|
142
138
|
#
|
143
139
|
# @example
|
144
|
-
# require "google/cloud"
|
140
|
+
# require "google/cloud/speech"
|
145
141
|
#
|
146
|
-
#
|
147
|
-
# speech = gcloud.speech
|
142
|
+
# speech = Google::Cloud::Speech.new
|
148
143
|
#
|
149
144
|
# job = speech.recognize_job "path/to/audio.raw",
|
150
145
|
# encoding: :raw, sample_rate: 16000
|
@@ -14,11 +14,12 @@
|
|
14
14
|
|
15
15
|
|
16
16
|
require "google/cloud/errors"
|
17
|
-
require "google/cloud/core/
|
17
|
+
require "google/cloud/core/environment"
|
18
18
|
require "google/cloud/speech/service"
|
19
19
|
require "google/cloud/speech/audio"
|
20
20
|
require "google/cloud/speech/result"
|
21
21
|
require "google/cloud/speech/job"
|
22
|
+
require "google/cloud/speech/stream"
|
22
23
|
|
23
24
|
module Google
|
24
25
|
module Cloud
|
@@ -38,10 +39,9 @@ module Google
|
|
38
39
|
# See {Google::Cloud#speech}
|
39
40
|
#
|
40
41
|
# @example
|
41
|
-
# require "google/cloud"
|
42
|
+
# require "google/cloud/speech"
|
42
43
|
#
|
43
|
-
#
|
44
|
-
# speech = gcloud.speech
|
44
|
+
# speech = Google::Cloud::Speech.new
|
45
45
|
#
|
46
46
|
# audio = speech.audio "path/to/audio.raw",
|
47
47
|
# encoding: :raw, sample_rate: 16000
|
@@ -49,7 +49,7 @@ module Google
|
|
49
49
|
#
|
50
50
|
# result = results.first
|
51
51
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
52
|
-
# result.confidence #=>
|
52
|
+
# result.confidence #=> 0.9826789498329163
|
53
53
|
#
|
54
54
|
class Project
|
55
55
|
##
|
@@ -65,11 +65,12 @@ module Google
|
|
65
65
|
# The Speech project connected to.
|
66
66
|
#
|
67
67
|
# @example
|
68
|
-
# require "google/cloud"
|
68
|
+
# require "google/cloud/speech"
|
69
69
|
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
70
|
+
# speech = Google::Cloud::Speech.new(
|
71
|
+
# project: "my-project-id",
|
72
|
+
# keyfile: "/path/to/keyfile.json"
|
73
|
+
# )
|
73
74
|
#
|
74
75
|
# speech.project #=> "my-project-id"
|
75
76
|
#
|
@@ -83,7 +84,7 @@ module Google
|
|
83
84
|
ENV["SPEECH_PROJECT"] ||
|
84
85
|
ENV["GOOGLE_CLOUD_PROJECT"] ||
|
85
86
|
ENV["GCLOUD_PROJECT"] ||
|
86
|
-
Google::Cloud::Core::
|
87
|
+
Google::Cloud::Core::Environment.project_id
|
87
88
|
end
|
88
89
|
|
89
90
|
##
|
@@ -135,33 +136,32 @@ module Google
|
|
135
136
|
# @return [Audio] The audio file to be recognized.
|
136
137
|
#
|
137
138
|
# @example
|
138
|
-
# require "google/cloud"
|
139
|
+
# require "google/cloud/speech"
|
139
140
|
#
|
140
|
-
#
|
141
|
-
# speech = gcloud.speech
|
141
|
+
# speech = Google::Cloud::Speech.new
|
142
142
|
#
|
143
143
|
# audio = speech.audio "path/to/audio.raw",
|
144
144
|
# encoding: :raw, sample_rate: 16000
|
145
145
|
#
|
146
146
|
# @example With a Google Cloud Storage URI:
|
147
|
-
# require "google/cloud"
|
147
|
+
# require "google/cloud/speech"
|
148
148
|
#
|
149
|
-
#
|
150
|
-
# speech = gcloud.speech
|
149
|
+
# speech = Google::Cloud::Speech.new
|
151
150
|
#
|
152
151
|
# audio = speech.audio "gs://bucket-name/path/to/audio.raw",
|
153
152
|
# encoding: :raw, sample_rate: 16000
|
154
153
|
#
|
155
154
|
# @example With a Google Cloud Storage File object:
|
156
|
-
# require "google/cloud"
|
155
|
+
# require "google/cloud/storage"
|
157
156
|
#
|
158
|
-
#
|
159
|
-
# storage = gcloud.storage
|
157
|
+
# storage = Google::Cloud::Storage.new
|
160
158
|
#
|
161
159
|
# bucket = storage.bucket "bucket-name"
|
162
160
|
# file = bucket.file "path/to/audio.raw"
|
163
161
|
#
|
164
|
-
#
|
162
|
+
# require "google/cloud/speech"
|
163
|
+
#
|
164
|
+
# speech = Google::Cloud::Speech.new
|
165
165
|
#
|
166
166
|
# audio = speech.audio file, encoding: :raw, sample_rate: 16000
|
167
167
|
#
|
@@ -243,33 +243,32 @@ module Google
|
|
243
243
|
# @return [Array<Result>] The transcribed text of audio recognized.
|
244
244
|
#
|
245
245
|
# @example
|
246
|
-
# require "google/cloud"
|
246
|
+
# require "google/cloud/speech"
|
247
247
|
#
|
248
|
-
#
|
249
|
-
# speech = gcloud.speech
|
248
|
+
# speech = Google::Cloud::Speech.new
|
250
249
|
#
|
251
250
|
# results = speech.recognize "path/to/audio.raw",
|
252
251
|
# encoding: :raw, sample_rate: 16000
|
253
252
|
#
|
254
253
|
# @example With a Google Cloud Storage URI:
|
255
|
-
# require "google/cloud"
|
254
|
+
# require "google/cloud/speech"
|
256
255
|
#
|
257
|
-
#
|
258
|
-
# speech = gcloud.speech
|
256
|
+
# speech = Google::Cloud::Speech.new
|
259
257
|
#
|
260
258
|
# results = speech.recognize "gs://bucket-name/path/to/audio.raw",
|
261
259
|
# encoding: :raw, sample_rate: 16000
|
262
260
|
#
|
263
261
|
# @example With a Google Cloud Storage File object:
|
264
|
-
# require "google/cloud"
|
262
|
+
# require "google/cloud/storage"
|
265
263
|
#
|
266
|
-
#
|
267
|
-
# storage = gcloud.storage
|
264
|
+
# storage = Google::Cloud::Storage.new
|
268
265
|
#
|
269
266
|
# bucket = storage.bucket "bucket-name"
|
270
267
|
# file = bucket.file "path/to/audio.raw"
|
271
268
|
#
|
272
|
-
#
|
269
|
+
# require "google/cloud/speech"
|
270
|
+
#
|
271
|
+
# speech = Google::Cloud::Speech.new
|
273
272
|
#
|
274
273
|
# results = speech.recognize file, encoding: :raw,
|
275
274
|
# sample_rate: 16000,
|
@@ -342,10 +341,9 @@ module Google
|
|
342
341
|
# processing of a speech-recognition operation.
|
343
342
|
#
|
344
343
|
# @example
|
345
|
-
# require "google/cloud"
|
344
|
+
# require "google/cloud/speech"
|
346
345
|
#
|
347
|
-
#
|
348
|
-
# speech = gcloud.speech
|
346
|
+
# speech = Google::Cloud::Speech.new
|
349
347
|
#
|
350
348
|
# job = speech.recognize_job "path/to/audio.raw",
|
351
349
|
# encoding: :raw, sample_rate: 16000
|
@@ -354,10 +352,9 @@ module Google
|
|
354
352
|
# job.reload!
|
355
353
|
#
|
356
354
|
# @example With a Google Cloud Storage URI:
|
357
|
-
# require "google/cloud"
|
355
|
+
# require "google/cloud/speech"
|
358
356
|
#
|
359
|
-
#
|
360
|
-
# speech = gcloud.speech
|
357
|
+
# speech = Google::Cloud::Speech.new
|
361
358
|
#
|
362
359
|
# job = speech.recognize_job "gs://bucket-name/path/to/audio.raw",
|
363
360
|
# encoding: :raw, sample_rate: 16000
|
@@ -366,15 +363,16 @@ module Google
|
|
366
363
|
# job.reload!
|
367
364
|
#
|
368
365
|
# @example With a Google Cloud Storage File object:
|
369
|
-
# require "google/cloud"
|
366
|
+
# require "google/cloud/storage"
|
370
367
|
#
|
371
|
-
#
|
372
|
-
# storage = gcloud.storage
|
368
|
+
# storage = Google::Cloud::Storage.new
|
373
369
|
#
|
374
370
|
# bucket = storage.bucket "bucket-name"
|
375
371
|
# file = bucket.file "path/to/audio.raw"
|
376
372
|
#
|
377
|
-
#
|
373
|
+
# require "google/cloud/speech"
|
374
|
+
#
|
375
|
+
# speech = Google::Cloud::Speech.new
|
378
376
|
#
|
379
377
|
# job = speech.recognize_job file, encoding: :raw,
|
380
378
|
# sample_rate: 16000,
|
@@ -400,6 +398,110 @@ module Google
|
|
400
398
|
Job.from_grpc grpc, service
|
401
399
|
end
|
402
400
|
|
401
|
+
##
|
402
|
+
# Creates a Stream object to perform bidirectional streaming
|
403
|
+
# speech-recognition: receive results while sending audio.
|
404
|
+
#
|
405
|
+
# @see https://cloud.google.com/speech/docs/basics#streaming-recognition
|
406
|
+
# Streaming Speech API Recognition Requests
|
407
|
+
#
|
408
|
+
# @param [String, Symbol] encoding Encoding of audio data to be
|
409
|
+
# recognized. Optional.
|
410
|
+
#
|
411
|
+
# Acceptable values are:
|
412
|
+
#
|
413
|
+
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
414
|
+
# (LINEAR16)
|
415
|
+
# * `flac` - The [Free Lossless Audio
|
416
|
+
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
417
|
+
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
418
|
+
# are supported. (FLAC)
|
419
|
+
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
420
|
+
# G.711 PCMU/mu-law. (MULAW)
|
421
|
+
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
422
|
+
# be 8000 Hz.) (AMR)
|
423
|
+
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
424
|
+
# be 16000 Hz.) (AMR_WB)
|
425
|
+
#
|
426
|
+
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
427
|
+
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
428
|
+
# For best results, set the sampling rate of the audio source to 16000
|
429
|
+
# Hz. If that's not possible, use the native sample rate of the audio
|
430
|
+
# source (instead of re-sampling). Optional.
|
431
|
+
# @param [String] language The language of the supplied audio as a
|
432
|
+
# [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
|
433
|
+
# code. If not specified, the language defaults to "en-US". See
|
434
|
+
# [Language
|
435
|
+
# Support](https://cloud.google.com/speech/docs/best-practices#language_support)
|
436
|
+
# for a list of the currently supported language codes. Optional.
|
437
|
+
# @param [String] max_alternatives The Maximum number of recognition
|
438
|
+
# hypotheses to be returned. Default is 1. The service may return
|
439
|
+
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
440
|
+
# @param [Boolean] profanity_filter When `true`, the service will
|
441
|
+
# attempt to filter out profanities, replacing all but the initial
|
442
|
+
# character in each filtered word with asterisks, e.g. "f***". Default
|
443
|
+
# is `false`.
|
444
|
+
# @param [Array<String>] phrases A list of strings containing words and
|
445
|
+
# phrases "hints" so that the speech recognition is more likely to
|
446
|
+
# recognize them. See [usage
|
447
|
+
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
448
|
+
# @param [Boolean] utterance When `true`, the service will perform
|
449
|
+
# continuous recognition (continuing to process audio even if the user
|
450
|
+
# pauses speaking) until the client closes the output stream (gRPC
|
451
|
+
# API) or when the maximum time limit has been reached. Default is
|
452
|
+
# `false`.
|
453
|
+
# @param [Boolean] interim When `true`, interim results (tentative
|
454
|
+
# hypotheses) may be returned as they become available. Default is
|
455
|
+
# `false`.
|
456
|
+
#
|
457
|
+
# @return [Stream] A resource that represents the streaming requests and
|
458
|
+
# responses.
|
459
|
+
#
|
460
|
+
# @example
|
461
|
+
# require "google/cloud/speech"
|
462
|
+
#
|
463
|
+
# speech = Google::Cloud::Speech.new
|
464
|
+
#
|
465
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
466
|
+
#
|
467
|
+
# # register callback for when a result is returned
|
468
|
+
# stream.on_result do |results|
|
469
|
+
# result = results.first
|
470
|
+
# puts result.transcript # "how old is the Brooklyn Bridge"
|
471
|
+
# puts result.confidence # 0.9826789498329163
|
472
|
+
# end
|
473
|
+
#
|
474
|
+
# # Stream 5 seconds of audio from the microhone
|
475
|
+
# # Actual implementation of microphone input varies by platform
|
476
|
+
# 5.times.do
|
477
|
+
# stream.send MicrophoneInput.read(32000)
|
478
|
+
# end
|
479
|
+
#
|
480
|
+
# stream.stop
|
481
|
+
#
|
482
|
+
def stream encoding: nil, sample_rate: nil, language: nil,
|
483
|
+
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
484
|
+
utterance: nil, interim: nil
|
485
|
+
ensure_service!
|
486
|
+
|
487
|
+
grpc_req = V1beta1::StreamingRecognizeRequest.new(
|
488
|
+
streaming_config: V1beta1::StreamingRecognitionConfig.new(
|
489
|
+
{
|
490
|
+
config: audio_config(encoding: convert_encoding(encoding),
|
491
|
+
sample_rate: sample_rate,
|
492
|
+
language: language,
|
493
|
+
max_alternatives: max_alternatives,
|
494
|
+
profanity_filter: profanity_filter,
|
495
|
+
phrases: phrases),
|
496
|
+
single_utterance: utterance,
|
497
|
+
interim_results: interim
|
498
|
+
}.delete_if { |_, v| v.nil? }
|
499
|
+
)
|
500
|
+
)
|
501
|
+
|
502
|
+
Stream.new service, grpc_req
|
503
|
+
end
|
504
|
+
|
403
505
|
protected
|
404
506
|
|
405
507
|
def audio_config encoding: nil, sample_rate: nil, language: nil,
|