google-cloud-speech 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/google-cloud-speech.rb +117 -0
- data/lib/google/cloud/speech.rb +146 -0
- data/lib/google/cloud/speech/audio.rb +230 -0
- data/lib/google/cloud/speech/credentials.rb +32 -0
- data/lib/google/cloud/speech/job.rb +178 -0
- data/lib/google/cloud/speech/project.rb +435 -0
- data/lib/google/cloud/speech/result.rb +82 -0
- data/lib/google/cloud/speech/service.rb +107 -0
- data/lib/google/cloud/speech/v1beta1.rb +17 -0
- data/lib/google/cloud/speech/v1beta1/cloud_speech_pb.rb +116 -0
- data/lib/google/cloud/speech/v1beta1/cloud_speech_services_pb.rb +54 -0
- data/lib/google/cloud/speech/v1beta1/speech_api.rb +208 -0
- data/lib/google/cloud/speech/v1beta1/speech_client_config.json +43 -0
- data/lib/google/cloud/speech/version.rb +22 -0
- metadata +256 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
# Copyright 2016 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/credentials"
|
17
|
+
require "google/cloud/speech/v1beta1"
|
18
|
+
|
19
|
+
module Google
|
20
|
+
module Cloud
|
21
|
+
module Speech
|
22
|
+
##
|
23
|
+
# @private Represents the OAuth 2.0 signing logic for Speech.
|
24
|
+
class Credentials < Google::Cloud::Credentials
|
25
|
+
SCOPE = V1beta1::SpeechApi::ALL_SCOPES
|
26
|
+
PATH_ENV_VARS = %w(SPEECH_KEYFILE GOOGLE_CLOUD_KEYFILE GCLOUD_KEYFILE)
|
27
|
+
JSON_ENV_VARS = %w(SPEECH_KEYFILE_JSON GOOGLE_CLOUD_KEYFILE_JSON
|
28
|
+
GCLOUD_KEYFILE_JSON)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
# Copyright 2016 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/speech/v1beta1"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Speech
|
21
|
+
##
|
22
|
+
# # Job
|
23
|
+
#
|
24
|
+
# A resource represents the long-running, asynchronous processing of a
|
25
|
+
# speech-recognition operation. The job can be refreshed to retrieve
|
26
|
+
# recognition results once the audio data has been processed.
|
27
|
+
#
|
28
|
+
# See {Project#recognize_job} and {Audio#recognize_job}.
|
29
|
+
#
|
30
|
+
# @see https://cloud.google.com/speech/docs/basics#async-responses
|
31
|
+
# Asynchronous Speech API Responses
|
32
|
+
# @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
|
33
|
+
# Long-running Operation
|
34
|
+
#
|
35
|
+
# @example
|
36
|
+
# require "google/cloud"
|
37
|
+
#
|
38
|
+
# gcloud = Google::Cloud.new
|
39
|
+
# speech = gcloud.speech
|
40
|
+
#
|
41
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
42
|
+
# encoding: :raw, sample_rate: 16000
|
43
|
+
#
|
44
|
+
# job.done? #=> false
|
45
|
+
# job.reload! # API call
|
46
|
+
# job.done? #=> true
|
47
|
+
# results = job.results
|
48
|
+
#
|
49
|
+
class Job
|
50
|
+
##
|
51
|
+
# @private The Google::Longrunning::Operation gRPC object.
|
52
|
+
attr_accessor :grpc
|
53
|
+
|
54
|
+
##
|
55
|
+
# @private The gRPC Service object.
|
56
|
+
attr_accessor :service
|
57
|
+
|
58
|
+
##
|
59
|
+
# @private Creates a new Annotation instance.
|
60
|
+
def initialize
|
61
|
+
@grpc = nil
|
62
|
+
@service = nil
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# A speech recognition result corresponding to a portion of the audio.
|
67
|
+
#
|
68
|
+
# @return [Array<Result>] The transcribed text of audio recognized. If
|
69
|
+
# the job is not done this will return `nil`.
|
70
|
+
#
|
71
|
+
# @example
|
72
|
+
# require "google/cloud"
|
73
|
+
#
|
74
|
+
# gcloud = Google::Cloud.new
|
75
|
+
# speech = gcloud.speech
|
76
|
+
#
|
77
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
78
|
+
# encoding: :raw, sample_rate: 16000
|
79
|
+
#
|
80
|
+
# job.done? #=> true
|
81
|
+
# results = job.results
|
82
|
+
#
|
83
|
+
def results
|
84
|
+
return nil unless done?
|
85
|
+
return nil unless @grpc.result == :response
|
86
|
+
resp = V1beta1::AsyncRecognizeResponse.decode(@grpc.response.value)
|
87
|
+
resp.results.map do |result_grpc|
|
88
|
+
Result.from_grpc result_grpc
|
89
|
+
end
|
90
|
+
# TODO: Ensure we are raising the proper error
|
91
|
+
# TODO: Ensure GRPC behavior here, is an error already raised?
|
92
|
+
# raise @grpc.error
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# Checks if the speech-recognition processing of the audio data is
|
97
|
+
# complete.
|
98
|
+
#
|
99
|
+
# @return [boolean] `true` when complete, `false` otherwise.
|
100
|
+
#
|
101
|
+
# @example
|
102
|
+
# require "google/cloud"
|
103
|
+
#
|
104
|
+
# gcloud = Google::Cloud.new
|
105
|
+
# speech = gcloud.speech
|
106
|
+
#
|
107
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
108
|
+
# encoding: :raw, sample_rate: 16000
|
109
|
+
#
|
110
|
+
# job.done? #=> false
|
111
|
+
#
|
112
|
+
def done?
|
113
|
+
@grpc.done
|
114
|
+
end
|
115
|
+
|
116
|
+
##
|
117
|
+
# Reloads the job with current data from the long-running, asynchronous
|
118
|
+
# processing of a speech-recognition operation.
|
119
|
+
#
|
120
|
+
# @example
|
121
|
+
# require "google/cloud"
|
122
|
+
#
|
123
|
+
# gcloud = Google::Cloud.new
|
124
|
+
# speech = gcloud.speech
|
125
|
+
#
|
126
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
127
|
+
# encoding: :raw, sample_rate: 16000
|
128
|
+
#
|
129
|
+
# job.done? #=> false
|
130
|
+
# job.reload! # API call
|
131
|
+
# job.done? #=> true
|
132
|
+
#
|
133
|
+
def reload!
|
134
|
+
@grpc = @service.get_op @grpc.name
|
135
|
+
self
|
136
|
+
end
|
137
|
+
alias_method :refresh!, :reload!
|
138
|
+
|
139
|
+
##
|
140
|
+
# Reloads the job until the operation is complete. The delay between
|
141
|
+
# reloads will incrementally increase.
|
142
|
+
#
|
143
|
+
# @example
|
144
|
+
# require "google/cloud"
|
145
|
+
#
|
146
|
+
# gcloud = Google::Cloud.new
|
147
|
+
# speech = gcloud.speech
|
148
|
+
#
|
149
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
150
|
+
# encoding: :raw, sample_rate: 16000
|
151
|
+
#
|
152
|
+
# job.done? #=> false
|
153
|
+
# job.wait_until_done!
|
154
|
+
# job.done? #=> true
|
155
|
+
#
|
156
|
+
def wait_until_done!
|
157
|
+
backoff = ->(retries) { sleep 2 * retries + 5 }
|
158
|
+
retries = 0
|
159
|
+
until done?
|
160
|
+
backoff.call retries
|
161
|
+
retries += 1
|
162
|
+
reload!
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
##
|
167
|
+
# @private New Result::Job from a Google::Longrunning::Operation
|
168
|
+
# object.
|
169
|
+
def self.from_grpc grpc, service
|
170
|
+
new.tap do |job|
|
171
|
+
job.instance_variable_set :@grpc, grpc
|
172
|
+
job.instance_variable_set :@service, service
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
@@ -0,0 +1,435 @@
|
|
1
|
+
# Copyright 2016 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/errors"
|
17
|
+
require "google/cloud/core/gce"
|
18
|
+
require "google/cloud/speech/service"
|
19
|
+
require "google/cloud/speech/audio"
|
20
|
+
require "google/cloud/speech/result"
|
21
|
+
require "google/cloud/speech/job"
|
22
|
+
|
23
|
+
module Google
|
24
|
+
module Cloud
|
25
|
+
module Speech
|
26
|
+
##
|
27
|
+
# # Project
|
28
|
+
#
|
29
|
+
# The Google Cloud Speech API enables developers to convert audio to text
|
30
|
+
# by applying powerful neural network models. The API recognizes over 80
|
31
|
+
# languages and variants, to support your global user base. You can
|
32
|
+
# transcribe the text of users dictating to an application's microphone,
|
33
|
+
# enable command-and-control through voice, or transcribe audio files,
|
34
|
+
# among many other use cases. Recognize audio uploaded in the request, and
|
35
|
+
# integrate with your audio storage on Google Cloud Storage, by using the
|
36
|
+
# same technology Google uses to power its own products.
|
37
|
+
#
|
38
|
+
# See {Google::Cloud#speech}
|
39
|
+
#
|
40
|
+
# @example
|
41
|
+
# require "google/cloud"
|
42
|
+
#
|
43
|
+
# gcloud = Google::Cloud.new
|
44
|
+
# speech = gcloud.speech
|
45
|
+
#
|
46
|
+
# audio = speech.audio "path/to/audio.raw",
|
47
|
+
# encoding: :raw, sample_rate: 16000
|
48
|
+
# results = audio.recognize
|
49
|
+
#
|
50
|
+
# result = results.first
|
51
|
+
# result.transcript #=> "how old is the Brooklyn Bridge"
|
52
|
+
# result.confidence #=> 88.15
|
53
|
+
#
|
54
|
+
class Project
|
55
|
+
##
|
56
|
+
# @private The gRPC Service object.
|
57
|
+
attr_accessor :service
|
58
|
+
|
59
|
+
##
|
60
|
+
# @private Creates a new Speech Project instance.
|
61
|
+
def initialize service
|
62
|
+
@service = service
|
63
|
+
end
|
64
|
+
|
65
|
+
# The Speech project connected to.
|
66
|
+
#
|
67
|
+
# @example
|
68
|
+
# require "google/cloud"
|
69
|
+
#
|
70
|
+
# gcloud = Google::Cloud.new "my-project-id",
|
71
|
+
# "/path/to/keyfile.json"
|
72
|
+
# speech = gcloud.speech
|
73
|
+
#
|
74
|
+
# speech.project #=> "my-project-id"
|
75
|
+
#
|
76
|
+
def project
|
77
|
+
service.project
|
78
|
+
end
|
79
|
+
|
80
|
+
##
|
81
|
+
# @private Default project.
|
82
|
+
def self.default_project
|
83
|
+
ENV["SPEECH_PROJECT"] ||
|
84
|
+
ENV["GOOGLE_CLOUD_PROJECT"] ||
|
85
|
+
ENV["GCLOUD_PROJECT"] ||
|
86
|
+
Google::Cloud::Core::GCE.project_id
|
87
|
+
end
|
88
|
+
|
89
|
+
##
|
90
|
+
# Returns a new Audio instance from the given source. No API call is
|
91
|
+
# made.
|
92
|
+
#
|
93
|
+
# @see https://cloud.google.com/speech/docs/basics#audio-encodings
|
94
|
+
# Audio Encodings
|
95
|
+
# @see https://cloud.google.com/speech/docs/basics#sample-rates
|
96
|
+
# Sample Rates
|
97
|
+
# @see https://cloud.google.com/speech/docs/basics#languages
|
98
|
+
# Languages
|
99
|
+
#
|
100
|
+
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
101
|
+
# the path to the audio file to be recognized, or a File or other IO
|
102
|
+
# object of the audio contents, or a Cloud Storage URI of the form
|
103
|
+
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
104
|
+
# Google::Cloud::Storage::File of the text to be annotated.
|
105
|
+
# @param [String, Symbol] encoding Encoding of audio data to be
|
106
|
+
# recognized. Optional.
|
107
|
+
#
|
108
|
+
# Acceptable values are:
|
109
|
+
#
|
110
|
+
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
111
|
+
# (LINEAR16)
|
112
|
+
# * `flac` - The [Free Lossless Audio
|
113
|
+
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
114
|
+
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
115
|
+
# are supported. (FLAC)
|
116
|
+
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
117
|
+
# G.711 PCMU/mu-law. (MULAW)
|
118
|
+
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
119
|
+
# be 8000 Hz.) (AMR)
|
120
|
+
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
121
|
+
# be 16000 Hz.) (AMR_WB)
|
122
|
+
#
|
123
|
+
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
124
|
+
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
125
|
+
# For best results, set the sampling rate of the audio source to 16000
|
126
|
+
# Hz. If that's not possible, use the native sample rate of the audio
|
127
|
+
# source (instead of re-sampling). Optional.
|
128
|
+
# @param [String] language The language of the supplied audio as a
|
129
|
+
# [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
|
130
|
+
# code. If not specified, the language defaults to "en-US". See
|
131
|
+
# [Language
|
132
|
+
# Support](https://cloud.google.com/speech/docs/best-practices#language_support)
|
133
|
+
# for a list of the currently supported language codes. Optional.
|
134
|
+
#
|
135
|
+
# @return [Audio] The audio file to be recognized.
|
136
|
+
#
|
137
|
+
# @example
|
138
|
+
# require "google/cloud"
|
139
|
+
#
|
140
|
+
# gcloud = Google::Cloud.new
|
141
|
+
# speech = gcloud.speech
|
142
|
+
#
|
143
|
+
# audio = speech.audio "path/to/audio.raw",
|
144
|
+
# encoding: :raw, sample_rate: 16000
|
145
|
+
#
|
146
|
+
# @example With a Google Cloud Storage URI:
|
147
|
+
# require "google/cloud"
|
148
|
+
#
|
149
|
+
# gcloud = Google::Cloud.new
|
150
|
+
# speech = gcloud.speech
|
151
|
+
#
|
152
|
+
# audio = speech.audio "gs://bucket-name/path/to/audio.raw",
|
153
|
+
# encoding: :raw, sample_rate: 16000
|
154
|
+
#
|
155
|
+
# @example With a Google Cloud Storage File object:
|
156
|
+
# require "google/cloud"
|
157
|
+
#
|
158
|
+
# gcloud = Google::Cloud.new
|
159
|
+
# storage = gcloud.storage
|
160
|
+
#
|
161
|
+
# bucket = storage.bucket "bucket-name"
|
162
|
+
# file = bucket.file "path/to/audio.raw"
|
163
|
+
#
|
164
|
+
# speech = gcloud.speech
|
165
|
+
#
|
166
|
+
# audio = speech.audio file, encoding: :raw, sample_rate: 16000
|
167
|
+
#
|
168
|
+
def audio source, encoding: nil, sample_rate: nil, language: nil
|
169
|
+
if source.is_a? Audio
|
170
|
+
audio = source.dup
|
171
|
+
else
|
172
|
+
audio = Audio.from_source source, self
|
173
|
+
end
|
174
|
+
audio.encoding = encoding unless encoding.nil?
|
175
|
+
audio.sample_rate = sample_rate unless sample_rate.nil?
|
176
|
+
audio.language = language unless language.nil?
|
177
|
+
audio
|
178
|
+
end
|
179
|
+
|
180
|
+
##
|
181
|
+
# Performs synchronous speech recognition. Sends audio data to the
|
182
|
+
# Speech API, which performs recognition on that data, and returns
|
183
|
+
# results only after all audio has been processed. Limited to audio data
|
184
|
+
# of 1 minute or less in duration.
|
185
|
+
#
|
186
|
+
# The Speech API will take roughly the same amount of time to process
|
187
|
+
# audio data sent synchronously as the duration of the supplied audio
|
188
|
+
# data. That is, if you send audio data of 30 seconds in length, expect
|
189
|
+
# the synchronous request to take approximately 30 seconds to return
|
190
|
+
# results.
|
191
|
+
#
|
192
|
+
# @see https://cloud.google.com/speech/docs/basics#synchronous-recognition
|
193
|
+
# Synchronous Speech API Recognition
|
194
|
+
# @see https://cloud.google.com/speech/docs/basics#phrase-hints
|
195
|
+
# Phrase Hints
|
196
|
+
#
|
197
|
+
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
198
|
+
# the path to the audio file to be recognized, or a File or other IO
|
199
|
+
# object of the audio contents, or a Cloud Storage URI of the form
|
200
|
+
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
201
|
+
# Google::Cloud::Storage::File of the text to be annotated.
|
202
|
+
# @param [String, Symbol] encoding Encoding of audio data to be
|
203
|
+
# recognized. Optional.
|
204
|
+
#
|
205
|
+
# Acceptable values are:
|
206
|
+
#
|
207
|
+
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
208
|
+
# (LINEAR16)
|
209
|
+
# * `flac` - The [Free Lossless Audio
|
210
|
+
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
211
|
+
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
212
|
+
# are supported. (FLAC)
|
213
|
+
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
214
|
+
# G.711 PCMU/mu-law. (MULAW)
|
215
|
+
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
216
|
+
# be 8000 Hz.) (AMR)
|
217
|
+
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
218
|
+
# be 16000 Hz.) (AMR_WB)
|
219
|
+
#
|
220
|
+
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
221
|
+
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
222
|
+
# For best results, set the sampling rate of the audio source to 16000
|
223
|
+
# Hz. If that's not possible, use the native sample rate of the audio
|
224
|
+
# source (instead of re-sampling). Optional.
|
225
|
+
# @param [String] language The language of the supplied audio as a
|
226
|
+
# [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
|
227
|
+
# code. If not specified, the language defaults to "en-US". See
|
228
|
+
# [Language
|
229
|
+
# Support](https://cloud.google.com/speech/docs/best-practices#language_support)
|
230
|
+
# for a list of the currently supported language codes. Optional.
|
231
|
+
# @param [String] max_alternatives The Maximum number of recognition
|
232
|
+
# hypotheses to be returned. Default is 1. The service may return
|
233
|
+
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
234
|
+
# @param [Boolean] profanity_filter When `true`, the service will
|
235
|
+
# attempt to filter out profanities, replacing all but the initial
|
236
|
+
# character in each filtered word with asterisks, e.g. "f***". Default
|
237
|
+
# is `false`.
|
238
|
+
# @param [Array<String>] phrases A list of strings containing words and
|
239
|
+
# phrases "hints" so that the speech recognition is more likely to
|
240
|
+
# recognize them. See [usage
|
241
|
+
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
242
|
+
#
|
243
|
+
# @return [Array<Result>] The transcribed text of audio recognized.
|
244
|
+
#
|
245
|
+
# @example
|
246
|
+
# require "google/cloud"
|
247
|
+
#
|
248
|
+
# gcloud = Google::Cloud.new
|
249
|
+
# speech = gcloud.speech
|
250
|
+
#
|
251
|
+
# results = speech.recognize "path/to/audio.raw",
|
252
|
+
# encoding: :raw, sample_rate: 16000
|
253
|
+
#
|
254
|
+
# @example With a Google Cloud Storage URI:
|
255
|
+
# require "google/cloud"
|
256
|
+
#
|
257
|
+
# gcloud = Google::Cloud.new
|
258
|
+
# speech = gcloud.speech
|
259
|
+
#
|
260
|
+
# results = speech.recognize "gs://bucket-name/path/to/audio.raw",
|
261
|
+
# encoding: :raw, sample_rate: 16000
|
262
|
+
#
|
263
|
+
# @example With a Google Cloud Storage File object:
|
264
|
+
# require "google/cloud"
|
265
|
+
#
|
266
|
+
# gcloud = Google::Cloud.new
|
267
|
+
# storage = gcloud.storage
|
268
|
+
#
|
269
|
+
# bucket = storage.bucket "bucket-name"
|
270
|
+
# file = bucket.file "path/to/audio.raw"
|
271
|
+
#
|
272
|
+
# speech = gcloud.speech
|
273
|
+
#
|
274
|
+
# results = speech.recognize file, encoding: :raw,
|
275
|
+
# sample_rate: 16000,
|
276
|
+
# max_alternatives: 10
|
277
|
+
#
|
278
|
+
def recognize source, encoding: nil, sample_rate: nil, language: nil,
|
279
|
+
max_alternatives: nil, profanity_filter: nil, phrases: nil
|
280
|
+
ensure_service!
|
281
|
+
|
282
|
+
audio_obj = audio source, encoding: encoding,
|
283
|
+
sample_rate: sample_rate, language: language
|
284
|
+
|
285
|
+
config = audio_config(
|
286
|
+
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
287
|
+
language: audio_obj.language, max_alternatives: max_alternatives,
|
288
|
+
profanity_filter: profanity_filter, phrases: phrases)
|
289
|
+
|
290
|
+
grpc = service.recognize_sync audio_obj.to_grpc, config
|
291
|
+
grpc.results.map do |result_grpc|
|
292
|
+
Result.from_grpc result_grpc
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
##
|
297
|
+
# Performs asynchronous speech recognition. Requests are processed
|
298
|
+
# asynchronously, meaning a Job is returned once the audio data has been
|
299
|
+
# sent, and can be refreshed to retrieve recognition results once the
|
300
|
+
# audio data has been processed.
|
301
|
+
#
|
302
|
+
# @see https://cloud.google.com/speech/docs/basics#async-responses
|
303
|
+
# Asynchronous Speech API Responses
|
304
|
+
#
|
305
|
+
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
306
|
+
# the path to the audio file to be recognized, or a File or other IO
|
307
|
+
# object of the audio contents, or a Cloud Storage URI of the form
|
308
|
+
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
309
|
+
# Google::Cloud::Storage::File of the text to be annotated.
|
310
|
+
# @param [String, Symbol] encoding Encoding of audio data to be
|
311
|
+
# recognized. Optional.
|
312
|
+
#
|
313
|
+
# Currently, the only acceptable value is:
|
314
|
+
#
|
315
|
+
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
316
|
+
# (LINEAR16)
|
317
|
+
#
|
318
|
+
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
319
|
+
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
320
|
+
# For best results, set the sampling rate of the audio source to 16000
|
321
|
+
# Hz. If that's not possible, use the native sample rate of the audio
|
322
|
+
# source (instead of re-sampling). Optional.
|
323
|
+
# @param [String] language The language of the supplied audio as a
|
324
|
+
# [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
|
325
|
+
# code. If not specified, the language defaults to "en-US". See
|
326
|
+
# [Language
|
327
|
+
# Support](https://cloud.google.com/speech/docs/best-practices#language_support)
|
328
|
+
# for a list of the currently supported language codes. Optional.
|
329
|
+
# @param [String] max_alternatives The Maximum number of recognition
|
330
|
+
# hypotheses to be returned. Default is 1. The service may return
|
331
|
+
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
332
|
+
# @param [Boolean] profanity_filter When `true`, the service will
|
333
|
+
# attempt to filter out profanities, replacing all but the initial
|
334
|
+
# character in each filtered word with asterisks, e.g. "f***". Default
|
335
|
+
# is `false`.
|
336
|
+
# @param [Array<String>] phrases A list of strings containing words and
|
337
|
+
# phrases "hints" so that the speech recognition is more likely to
|
338
|
+
# recognize them. See [usage
|
339
|
+
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
340
|
+
#
|
341
|
+
# @return [Job] A resource represents the long-running, asynchronous
|
342
|
+
# processing of a speech-recognition operation.
|
343
|
+
#
|
344
|
+
# @example
|
345
|
+
# require "google/cloud"
|
346
|
+
#
|
347
|
+
# gcloud = Google::Cloud.new
|
348
|
+
# speech = gcloud.speech
|
349
|
+
#
|
350
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
351
|
+
# encoding: :raw, sample_rate: 16000
|
352
|
+
#
|
353
|
+
# job.done? #=> false
|
354
|
+
# job.reload!
|
355
|
+
#
|
356
|
+
# @example With a Google Cloud Storage URI:
|
357
|
+
# require "google/cloud"
|
358
|
+
#
|
359
|
+
# gcloud = Google::Cloud.new
|
360
|
+
# speech = gcloud.speech
|
361
|
+
#
|
362
|
+
# job = speech.recognize_job "gs://bucket-name/path/to/audio.raw",
|
363
|
+
# encoding: :raw, sample_rate: 16000
|
364
|
+
#
|
365
|
+
# job.done? #=> false
|
366
|
+
# job.reload!
|
367
|
+
#
|
368
|
+
# @example With a Google Cloud Storage File object:
|
369
|
+
# require "google/cloud"
|
370
|
+
#
|
371
|
+
# gcloud = Google::Cloud.new
|
372
|
+
# storage = gcloud.storage
|
373
|
+
#
|
374
|
+
# bucket = storage.bucket "bucket-name"
|
375
|
+
# file = bucket.file "path/to/audio.raw"
|
376
|
+
#
|
377
|
+
# speech = gcloud.speech
|
378
|
+
#
|
379
|
+
# job = speech.recognize_job file, encoding: :raw,
|
380
|
+
# sample_rate: 16000,
|
381
|
+
# max_alternatives: 10
|
382
|
+
#
|
383
|
+
# job.done? #=> false
|
384
|
+
# job.reload!
|
385
|
+
#
|
386
|
+
def recognize_job source, encoding: nil, sample_rate: nil,
|
387
|
+
language: nil, max_alternatives: nil,
|
388
|
+
profanity_filter: nil, phrases: nil
|
389
|
+
ensure_service!
|
390
|
+
|
391
|
+
audio_obj = audio source, encoding: encoding,
|
392
|
+
sample_rate: sample_rate, language: language
|
393
|
+
|
394
|
+
config = audio_config(
|
395
|
+
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
396
|
+
language: audio_obj.language, max_alternatives: max_alternatives,
|
397
|
+
profanity_filter: profanity_filter, phrases: phrases)
|
398
|
+
|
399
|
+
grpc = service.recognize_async audio_obj.to_grpc, config
|
400
|
+
Job.from_grpc grpc, service
|
401
|
+
end
|
402
|
+
|
403
|
+
protected
|
404
|
+
|
405
|
+
def audio_config encoding: nil, sample_rate: nil, language: nil,
|
406
|
+
max_alternatives: nil, profanity_filter: nil,
|
407
|
+
phrases: nil
|
408
|
+
context = nil
|
409
|
+
context = V1beta1::SpeechContext.new(phrases: phrases) if phrases
|
410
|
+
V1beta1::RecognitionConfig.new({
|
411
|
+
encoding: convert_encoding(encoding),
|
412
|
+
sample_rate: sample_rate,
|
413
|
+
language_code: language,
|
414
|
+
max_alternatives: max_alternatives,
|
415
|
+
profanity_filter: profanity_filter,
|
416
|
+
speech_context: context
|
417
|
+
}.delete_if { |_, v| v.nil? })
|
418
|
+
end
|
419
|
+
|
420
|
+
def convert_encoding encoding
|
421
|
+
mapping = { raw: :LINEAR16, linear: :LINEAR16, linear16: :LINEAR16,
|
422
|
+
flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB }
|
423
|
+
mapping[encoding] || encoding
|
424
|
+
end
|
425
|
+
|
426
|
+
##
|
427
|
+
# @private Raise an error unless an active connection to the service is
|
428
|
+
# available.
|
429
|
+
def ensure_service!
|
430
|
+
fail "Must have active connection to service" unless service
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end
|
435
|
+
end
|