google-cloud-speech 0.20.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/google-cloud-speech.rb +117 -0
- data/lib/google/cloud/speech.rb +146 -0
- data/lib/google/cloud/speech/audio.rb +230 -0
- data/lib/google/cloud/speech/credentials.rb +32 -0
- data/lib/google/cloud/speech/job.rb +178 -0
- data/lib/google/cloud/speech/project.rb +435 -0
- data/lib/google/cloud/speech/result.rb +82 -0
- data/lib/google/cloud/speech/service.rb +107 -0
- data/lib/google/cloud/speech/v1beta1.rb +17 -0
- data/lib/google/cloud/speech/v1beta1/cloud_speech_pb.rb +116 -0
- data/lib/google/cloud/speech/v1beta1/cloud_speech_services_pb.rb +54 -0
- data/lib/google/cloud/speech/v1beta1/speech_api.rb +208 -0
- data/lib/google/cloud/speech/v1beta1/speech_client_config.json +43 -0
- data/lib/google/cloud/speech/version.rb +22 -0
- metadata +256 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
# Copyright 2016 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/credentials"
|
17
|
+
require "google/cloud/speech/v1beta1"
|
18
|
+
|
19
|
+
module Google
|
20
|
+
module Cloud
|
21
|
+
module Speech
|
22
|
+
##
|
23
|
+
# @private Represents the OAuth 2.0 signing logic for Speech.
|
24
|
+
class Credentials < Google::Cloud::Credentials
|
25
|
+
SCOPE = V1beta1::SpeechApi::ALL_SCOPES
|
26
|
+
PATH_ENV_VARS = %w(SPEECH_KEYFILE GOOGLE_CLOUD_KEYFILE GCLOUD_KEYFILE)
|
27
|
+
JSON_ENV_VARS = %w(SPEECH_KEYFILE_JSON GOOGLE_CLOUD_KEYFILE_JSON
|
28
|
+
GCLOUD_KEYFILE_JSON)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
# Copyright 2016 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/speech/v1beta1"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Speech
|
21
|
+
##
|
22
|
+
# # Job
|
23
|
+
#
|
24
|
+
# A resource represents the long-running, asynchronous processing of a
|
25
|
+
# speech-recognition operation. The job can be refreshed to retrieve
|
26
|
+
# recognition results once the audio data has been processed.
|
27
|
+
#
|
28
|
+
# See {Project#recognize_job} and {Audio#recognize_job}.
|
29
|
+
#
|
30
|
+
# @see https://cloud.google.com/speech/docs/basics#async-responses
|
31
|
+
# Asynchronous Speech API Responses
|
32
|
+
# @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
|
33
|
+
# Long-running Operation
|
34
|
+
#
|
35
|
+
# @example
|
36
|
+
# require "google/cloud"
|
37
|
+
#
|
38
|
+
# gcloud = Google::Cloud.new
|
39
|
+
# speech = gcloud.speech
|
40
|
+
#
|
41
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
42
|
+
# encoding: :raw, sample_rate: 16000
|
43
|
+
#
|
44
|
+
# job.done? #=> false
|
45
|
+
# job.reload! # API call
|
46
|
+
# job.done? #=> true
|
47
|
+
# results = job.results
|
48
|
+
#
|
49
|
+
class Job
|
50
|
+
##
|
51
|
+
# @private The Google::Longrunning::Operation gRPC object.
|
52
|
+
attr_accessor :grpc
|
53
|
+
|
54
|
+
##
|
55
|
+
# @private The gRPC Service object.
|
56
|
+
attr_accessor :service
|
57
|
+
|
58
|
+
##
|
59
|
+
# @private Creates a new Annotation instance.
|
60
|
+
def initialize
|
61
|
+
@grpc = nil
|
62
|
+
@service = nil
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# A speech recognition result corresponding to a portion of the audio.
|
67
|
+
#
|
68
|
+
# @return [Array<Result>] The transcribed text of audio recognized. If
|
69
|
+
# the job is not done this will return `nil`.
|
70
|
+
#
|
71
|
+
# @example
|
72
|
+
# require "google/cloud"
|
73
|
+
#
|
74
|
+
# gcloud = Google::Cloud.new
|
75
|
+
# speech = gcloud.speech
|
76
|
+
#
|
77
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
78
|
+
# encoding: :raw, sample_rate: 16000
|
79
|
+
#
|
80
|
+
# job.done? #=> true
|
81
|
+
# results = job.results
|
82
|
+
#
|
83
|
+
def results
|
84
|
+
return nil unless done?
|
85
|
+
return nil unless @grpc.result == :response
|
86
|
+
resp = V1beta1::AsyncRecognizeResponse.decode(@grpc.response.value)
|
87
|
+
resp.results.map do |result_grpc|
|
88
|
+
Result.from_grpc result_grpc
|
89
|
+
end
|
90
|
+
# TODO: Ensure we are raising the proper error
|
91
|
+
# TODO: Ensure GRPC behavior here, is an error already raised?
|
92
|
+
# raise @grpc.error
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# Checks if the speech-recognition processing of the audio data is
|
97
|
+
# complete.
|
98
|
+
#
|
99
|
+
# @return [boolean] `true` when complete, `false` otherwise.
|
100
|
+
#
|
101
|
+
# @example
|
102
|
+
# require "google/cloud"
|
103
|
+
#
|
104
|
+
# gcloud = Google::Cloud.new
|
105
|
+
# speech = gcloud.speech
|
106
|
+
#
|
107
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
108
|
+
# encoding: :raw, sample_rate: 16000
|
109
|
+
#
|
110
|
+
# job.done? #=> false
|
111
|
+
#
|
112
|
+
def done?
|
113
|
+
@grpc.done
|
114
|
+
end
|
115
|
+
|
116
|
+
##
|
117
|
+
# Reloads the job with current data from the long-running, asynchronous
|
118
|
+
# processing of a speech-recognition operation.
|
119
|
+
#
|
120
|
+
# @example
|
121
|
+
# require "google/cloud"
|
122
|
+
#
|
123
|
+
# gcloud = Google::Cloud.new
|
124
|
+
# speech = gcloud.speech
|
125
|
+
#
|
126
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
127
|
+
# encoding: :raw, sample_rate: 16000
|
128
|
+
#
|
129
|
+
# job.done? #=> false
|
130
|
+
# job.reload! # API call
|
131
|
+
# job.done? #=> true
|
132
|
+
#
|
133
|
+
def reload!
|
134
|
+
@grpc = @service.get_op @grpc.name
|
135
|
+
self
|
136
|
+
end
|
137
|
+
alias_method :refresh!, :reload!
|
138
|
+
|
139
|
+
##
|
140
|
+
# Reloads the job until the operation is complete. The delay between
|
141
|
+
# reloads will incrementally increase.
|
142
|
+
#
|
143
|
+
# @example
|
144
|
+
# require "google/cloud"
|
145
|
+
#
|
146
|
+
# gcloud = Google::Cloud.new
|
147
|
+
# speech = gcloud.speech
|
148
|
+
#
|
149
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
150
|
+
# encoding: :raw, sample_rate: 16000
|
151
|
+
#
|
152
|
+
# job.done? #=> false
|
153
|
+
# job.wait_until_done!
|
154
|
+
# job.done? #=> true
|
155
|
+
#
|
156
|
+
def wait_until_done!
|
157
|
+
backoff = ->(retries) { sleep 2 * retries + 5 }
|
158
|
+
retries = 0
|
159
|
+
until done?
|
160
|
+
backoff.call retries
|
161
|
+
retries += 1
|
162
|
+
reload!
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
##
|
167
|
+
# @private New Result::Job from a Google::Longrunning::Operation
|
168
|
+
# object.
|
169
|
+
def self.from_grpc grpc, service
|
170
|
+
new.tap do |job|
|
171
|
+
job.instance_variable_set :@grpc, grpc
|
172
|
+
job.instance_variable_set :@service, service
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
@@ -0,0 +1,435 @@
|
|
1
|
+
# Copyright 2016 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/errors"
|
17
|
+
require "google/cloud/core/gce"
|
18
|
+
require "google/cloud/speech/service"
|
19
|
+
require "google/cloud/speech/audio"
|
20
|
+
require "google/cloud/speech/result"
|
21
|
+
require "google/cloud/speech/job"
|
22
|
+
|
23
|
+
module Google
|
24
|
+
module Cloud
|
25
|
+
module Speech
|
26
|
+
##
|
27
|
+
# # Project
|
28
|
+
#
|
29
|
+
# The Google Cloud Speech API enables developers to convert audio to text
|
30
|
+
# by applying powerful neural network models. The API recognizes over 80
|
31
|
+
# languages and variants, to support your global user base. You can
|
32
|
+
# transcribe the text of users dictating to an application's microphone,
|
33
|
+
# enable command-and-control through voice, or transcribe audio files,
|
34
|
+
# among many other use cases. Recognize audio uploaded in the request, and
|
35
|
+
# integrate with your audio storage on Google Cloud Storage, by using the
|
36
|
+
# same technology Google uses to power its own products.
|
37
|
+
#
|
38
|
+
# See {Google::Cloud#speech}
|
39
|
+
#
|
40
|
+
# @example
|
41
|
+
# require "google/cloud"
|
42
|
+
#
|
43
|
+
# gcloud = Google::Cloud.new
|
44
|
+
# speech = gcloud.speech
|
45
|
+
#
|
46
|
+
# audio = speech.audio "path/to/audio.raw",
|
47
|
+
# encoding: :raw, sample_rate: 16000
|
48
|
+
# results = audio.recognize
|
49
|
+
#
|
50
|
+
# result = results.first
|
51
|
+
# result.transcript #=> "how old is the Brooklyn Bridge"
|
52
|
+
# result.confidence #=> 88.15
|
53
|
+
#
|
54
|
+
class Project
|
55
|
+
##
|
56
|
+
# @private The gRPC Service object.
|
57
|
+
attr_accessor :service
|
58
|
+
|
59
|
+
##
|
60
|
+
# @private Creates a new Speech Project instance.
|
61
|
+
def initialize service
|
62
|
+
@service = service
|
63
|
+
end
|
64
|
+
|
65
|
+
# The Speech project connected to.
|
66
|
+
#
|
67
|
+
# @example
|
68
|
+
# require "google/cloud"
|
69
|
+
#
|
70
|
+
# gcloud = Google::Cloud.new "my-project-id",
|
71
|
+
# "/path/to/keyfile.json"
|
72
|
+
# speech = gcloud.speech
|
73
|
+
#
|
74
|
+
# speech.project #=> "my-project-id"
|
75
|
+
#
|
76
|
+
def project
|
77
|
+
service.project
|
78
|
+
end
|
79
|
+
|
80
|
+
##
|
81
|
+
# @private Default project.
|
82
|
+
def self.default_project
|
83
|
+
ENV["SPEECH_PROJECT"] ||
|
84
|
+
ENV["GOOGLE_CLOUD_PROJECT"] ||
|
85
|
+
ENV["GCLOUD_PROJECT"] ||
|
86
|
+
Google::Cloud::Core::GCE.project_id
|
87
|
+
end
|
88
|
+
|
89
|
+
##
|
90
|
+
# Returns a new Audio instance from the given source. No API call is
|
91
|
+
# made.
|
92
|
+
#
|
93
|
+
# @see https://cloud.google.com/speech/docs/basics#audio-encodings
|
94
|
+
# Audio Encodings
|
95
|
+
# @see https://cloud.google.com/speech/docs/basics#sample-rates
|
96
|
+
# Sample Rates
|
97
|
+
# @see https://cloud.google.com/speech/docs/basics#languages
|
98
|
+
# Languages
|
99
|
+
#
|
100
|
+
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
101
|
+
# the path to the audio file to be recognized, or a File or other IO
|
102
|
+
# object of the audio contents, or a Cloud Storage URI of the form
|
103
|
+
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
104
|
+
# Google::Cloud::Storage::File of the text to be annotated.
|
105
|
+
# @param [String, Symbol] encoding Encoding of audio data to be
|
106
|
+
# recognized. Optional.
|
107
|
+
#
|
108
|
+
# Acceptable values are:
|
109
|
+
#
|
110
|
+
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
111
|
+
# (LINEAR16)
|
112
|
+
# * `flac` - The [Free Lossless Audio
|
113
|
+
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
114
|
+
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
115
|
+
# are supported. (FLAC)
|
116
|
+
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
117
|
+
# G.711 PCMU/mu-law. (MULAW)
|
118
|
+
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
119
|
+
# be 8000 Hz.) (AMR)
|
120
|
+
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
121
|
+
# be 16000 Hz.) (AMR_WB)
|
122
|
+
#
|
123
|
+
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
124
|
+
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
125
|
+
# For best results, set the sampling rate of the audio source to 16000
|
126
|
+
# Hz. If that's not possible, use the native sample rate of the audio
|
127
|
+
# source (instead of re-sampling). Optional.
|
128
|
+
# @param [String] language The language of the supplied audio as a
|
129
|
+
# [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
|
130
|
+
# code. If not specified, the language defaults to "en-US". See
|
131
|
+
# [Language
|
132
|
+
# Support](https://cloud.google.com/speech/docs/best-practices#language_support)
|
133
|
+
# for a list of the currently supported language codes. Optional.
|
134
|
+
#
|
135
|
+
# @return [Audio] The audio file to be recognized.
|
136
|
+
#
|
137
|
+
# @example
|
138
|
+
# require "google/cloud"
|
139
|
+
#
|
140
|
+
# gcloud = Google::Cloud.new
|
141
|
+
# speech = gcloud.speech
|
142
|
+
#
|
143
|
+
# audio = speech.audio "path/to/audio.raw",
|
144
|
+
# encoding: :raw, sample_rate: 16000
|
145
|
+
#
|
146
|
+
# @example With a Google Cloud Storage URI:
|
147
|
+
# require "google/cloud"
|
148
|
+
#
|
149
|
+
# gcloud = Google::Cloud.new
|
150
|
+
# speech = gcloud.speech
|
151
|
+
#
|
152
|
+
# audio = speech.audio "gs://bucket-name/path/to/audio.raw",
|
153
|
+
# encoding: :raw, sample_rate: 16000
|
154
|
+
#
|
155
|
+
# @example With a Google Cloud Storage File object:
|
156
|
+
# require "google/cloud"
|
157
|
+
#
|
158
|
+
# gcloud = Google::Cloud.new
|
159
|
+
# storage = gcloud.storage
|
160
|
+
#
|
161
|
+
# bucket = storage.bucket "bucket-name"
|
162
|
+
# file = bucket.file "path/to/audio.raw"
|
163
|
+
#
|
164
|
+
# speech = gcloud.speech
|
165
|
+
#
|
166
|
+
# audio = speech.audio file, encoding: :raw, sample_rate: 16000
|
167
|
+
#
|
168
|
+
def audio source, encoding: nil, sample_rate: nil, language: nil
|
169
|
+
if source.is_a? Audio
|
170
|
+
audio = source.dup
|
171
|
+
else
|
172
|
+
audio = Audio.from_source source, self
|
173
|
+
end
|
174
|
+
audio.encoding = encoding unless encoding.nil?
|
175
|
+
audio.sample_rate = sample_rate unless sample_rate.nil?
|
176
|
+
audio.language = language unless language.nil?
|
177
|
+
audio
|
178
|
+
end
|
179
|
+
|
180
|
+
##
|
181
|
+
# Performs synchronous speech recognition. Sends audio data to the
|
182
|
+
# Speech API, which performs recognition on that data, and returns
|
183
|
+
# results only after all audio has been processed. Limited to audio data
|
184
|
+
# of 1 minute or less in duration.
|
185
|
+
#
|
186
|
+
# The Speech API will take roughly the same amount of time to process
|
187
|
+
# audio data sent synchronously as the duration of the supplied audio
|
188
|
+
# data. That is, if you send audio data of 30 seconds in length, expect
|
189
|
+
# the synchronous request to take approximately 30 seconds to return
|
190
|
+
# results.
|
191
|
+
#
|
192
|
+
# @see https://cloud.google.com/speech/docs/basics#synchronous-recognition
|
193
|
+
# Synchronous Speech API Recognition
|
194
|
+
# @see https://cloud.google.com/speech/docs/basics#phrase-hints
|
195
|
+
# Phrase Hints
|
196
|
+
#
|
197
|
+
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
198
|
+
# the path to the audio file to be recognized, or a File or other IO
|
199
|
+
# object of the audio contents, or a Cloud Storage URI of the form
|
200
|
+
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
201
|
+
# Google::Cloud::Storage::File of the text to be annotated.
|
202
|
+
# @param [String, Symbol] encoding Encoding of audio data to be
|
203
|
+
# recognized. Optional.
|
204
|
+
#
|
205
|
+
# Acceptable values are:
|
206
|
+
#
|
207
|
+
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
208
|
+
# (LINEAR16)
|
209
|
+
# * `flac` - The [Free Lossless Audio
|
210
|
+
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
211
|
+
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
212
|
+
# are supported. (FLAC)
|
213
|
+
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
214
|
+
# G.711 PCMU/mu-law. (MULAW)
|
215
|
+
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
216
|
+
# be 8000 Hz.) (AMR)
|
217
|
+
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
218
|
+
# be 16000 Hz.) (AMR_WB)
|
219
|
+
#
|
220
|
+
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
221
|
+
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
222
|
+
# For best results, set the sampling rate of the audio source to 16000
|
223
|
+
# Hz. If that's not possible, use the native sample rate of the audio
|
224
|
+
# source (instead of re-sampling). Optional.
|
225
|
+
# @param [String] language The language of the supplied audio as a
|
226
|
+
# [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
|
227
|
+
# code. If not specified, the language defaults to "en-US". See
|
228
|
+
# [Language
|
229
|
+
# Support](https://cloud.google.com/speech/docs/best-practices#language_support)
|
230
|
+
# for a list of the currently supported language codes. Optional.
|
231
|
+
# @param [String] max_alternatives The Maximum number of recognition
|
232
|
+
# hypotheses to be returned. Default is 1. The service may return
|
233
|
+
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
234
|
+
# @param [Boolean] profanity_filter When `true`, the service will
|
235
|
+
# attempt to filter out profanities, replacing all but the initial
|
236
|
+
# character in each filtered word with asterisks, e.g. "f***". Default
|
237
|
+
# is `false`.
|
238
|
+
# @param [Array<String>] phrases A list of strings containing words and
|
239
|
+
# phrases "hints" so that the speech recognition is more likely to
|
240
|
+
# recognize them. See [usage
|
241
|
+
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
242
|
+
#
|
243
|
+
# @return [Array<Result>] The transcribed text of audio recognized.
|
244
|
+
#
|
245
|
+
# @example
|
246
|
+
# require "google/cloud"
|
247
|
+
#
|
248
|
+
# gcloud = Google::Cloud.new
|
249
|
+
# speech = gcloud.speech
|
250
|
+
#
|
251
|
+
# results = speech.recognize "path/to/audio.raw",
|
252
|
+
# encoding: :raw, sample_rate: 16000
|
253
|
+
#
|
254
|
+
# @example With a Google Cloud Storage URI:
|
255
|
+
# require "google/cloud"
|
256
|
+
#
|
257
|
+
# gcloud = Google::Cloud.new
|
258
|
+
# speech = gcloud.speech
|
259
|
+
#
|
260
|
+
# results = speech.recognize "gs://bucket-name/path/to/audio.raw",
|
261
|
+
# encoding: :raw, sample_rate: 16000
|
262
|
+
#
|
263
|
+
# @example With a Google Cloud Storage File object:
|
264
|
+
# require "google/cloud"
|
265
|
+
#
|
266
|
+
# gcloud = Google::Cloud.new
|
267
|
+
# storage = gcloud.storage
|
268
|
+
#
|
269
|
+
# bucket = storage.bucket "bucket-name"
|
270
|
+
# file = bucket.file "path/to/audio.raw"
|
271
|
+
#
|
272
|
+
# speech = gcloud.speech
|
273
|
+
#
|
274
|
+
# results = speech.recognize file, encoding: :raw,
|
275
|
+
# sample_rate: 16000,
|
276
|
+
# max_alternatives: 10
|
277
|
+
#
|
278
|
+
def recognize source, encoding: nil, sample_rate: nil, language: nil,
|
279
|
+
max_alternatives: nil, profanity_filter: nil, phrases: nil
|
280
|
+
ensure_service!
|
281
|
+
|
282
|
+
audio_obj = audio source, encoding: encoding,
|
283
|
+
sample_rate: sample_rate, language: language
|
284
|
+
|
285
|
+
config = audio_config(
|
286
|
+
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
287
|
+
language: audio_obj.language, max_alternatives: max_alternatives,
|
288
|
+
profanity_filter: profanity_filter, phrases: phrases)
|
289
|
+
|
290
|
+
grpc = service.recognize_sync audio_obj.to_grpc, config
|
291
|
+
grpc.results.map do |result_grpc|
|
292
|
+
Result.from_grpc result_grpc
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
##
|
297
|
+
# Performs asynchronous speech recognition. Requests are processed
|
298
|
+
# asynchronously, meaning a Job is returned once the audio data has been
|
299
|
+
# sent, and can be refreshed to retrieve recognition results once the
|
300
|
+
# audio data has been processed.
|
301
|
+
#
|
302
|
+
# @see https://cloud.google.com/speech/docs/basics#async-responses
|
303
|
+
# Asynchronous Speech API Responses
|
304
|
+
#
|
305
|
+
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
306
|
+
# the path to the audio file to be recognized, or a File or other IO
|
307
|
+
# object of the audio contents, or a Cloud Storage URI of the form
|
308
|
+
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
309
|
+
# Google::Cloud::Storage::File of the text to be annotated.
|
310
|
+
# @param [String, Symbol] encoding Encoding of audio data to be
|
311
|
+
# recognized. Optional.
|
312
|
+
#
|
313
|
+
# Currently, the only acceptable value is:
|
314
|
+
#
|
315
|
+
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
316
|
+
# (LINEAR16)
|
317
|
+
#
|
318
|
+
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
319
|
+
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
320
|
+
# For best results, set the sampling rate of the audio source to 16000
|
321
|
+
# Hz. If that's not possible, use the native sample rate of the audio
|
322
|
+
# source (instead of re-sampling). Optional.
|
323
|
+
# @param [String] language The language of the supplied audio as a
|
324
|
+
# [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
|
325
|
+
# code. If not specified, the language defaults to "en-US". See
|
326
|
+
# [Language
|
327
|
+
# Support](https://cloud.google.com/speech/docs/best-practices#language_support)
|
328
|
+
# for a list of the currently supported language codes. Optional.
|
329
|
+
# @param [String] max_alternatives The Maximum number of recognition
|
330
|
+
# hypotheses to be returned. Default is 1. The service may return
|
331
|
+
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
332
|
+
# @param [Boolean] profanity_filter When `true`, the service will
|
333
|
+
# attempt to filter out profanities, replacing all but the initial
|
334
|
+
# character in each filtered word with asterisks, e.g. "f***". Default
|
335
|
+
# is `false`.
|
336
|
+
# @param [Array<String>] phrases A list of strings containing words and
|
337
|
+
# phrases "hints" so that the speech recognition is more likely to
|
338
|
+
# recognize them. See [usage
|
339
|
+
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
340
|
+
#
|
341
|
+
# @return [Job] A resource represents the long-running, asynchronous
|
342
|
+
# processing of a speech-recognition operation.
|
343
|
+
#
|
344
|
+
# @example
|
345
|
+
# require "google/cloud"
|
346
|
+
#
|
347
|
+
# gcloud = Google::Cloud.new
|
348
|
+
# speech = gcloud.speech
|
349
|
+
#
|
350
|
+
# job = speech.recognize_job "path/to/audio.raw",
|
351
|
+
# encoding: :raw, sample_rate: 16000
|
352
|
+
#
|
353
|
+
# job.done? #=> false
|
354
|
+
# job.reload!
|
355
|
+
#
|
356
|
+
# @example With a Google Cloud Storage URI:
|
357
|
+
# require "google/cloud"
|
358
|
+
#
|
359
|
+
# gcloud = Google::Cloud.new
|
360
|
+
# speech = gcloud.speech
|
361
|
+
#
|
362
|
+
# job = speech.recognize_job "gs://bucket-name/path/to/audio.raw",
|
363
|
+
# encoding: :raw, sample_rate: 16000
|
364
|
+
#
|
365
|
+
# job.done? #=> false
|
366
|
+
# job.reload!
|
367
|
+
#
|
368
|
+
# @example With a Google Cloud Storage File object:
|
369
|
+
# require "google/cloud"
|
370
|
+
#
|
371
|
+
# gcloud = Google::Cloud.new
|
372
|
+
# storage = gcloud.storage
|
373
|
+
#
|
374
|
+
# bucket = storage.bucket "bucket-name"
|
375
|
+
# file = bucket.file "path/to/audio.raw"
|
376
|
+
#
|
377
|
+
# speech = gcloud.speech
|
378
|
+
#
|
379
|
+
# job = speech.recognize_job file, encoding: :raw,
|
380
|
+
# sample_rate: 16000,
|
381
|
+
# max_alternatives: 10
|
382
|
+
#
|
383
|
+
# job.done? #=> false
|
384
|
+
# job.reload!
|
385
|
+
#
|
386
|
+
def recognize_job source, encoding: nil, sample_rate: nil,
|
387
|
+
language: nil, max_alternatives: nil,
|
388
|
+
profanity_filter: nil, phrases: nil
|
389
|
+
ensure_service!
|
390
|
+
|
391
|
+
audio_obj = audio source, encoding: encoding,
|
392
|
+
sample_rate: sample_rate, language: language
|
393
|
+
|
394
|
+
config = audio_config(
|
395
|
+
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
396
|
+
language: audio_obj.language, max_alternatives: max_alternatives,
|
397
|
+
profanity_filter: profanity_filter, phrases: phrases)
|
398
|
+
|
399
|
+
grpc = service.recognize_async audio_obj.to_grpc, config
|
400
|
+
Job.from_grpc grpc, service
|
401
|
+
end
|
402
|
+
|
403
|
+
protected
|
404
|
+
|
405
|
+
def audio_config encoding: nil, sample_rate: nil, language: nil,
|
406
|
+
max_alternatives: nil, profanity_filter: nil,
|
407
|
+
phrases: nil
|
408
|
+
context = nil
|
409
|
+
context = V1beta1::SpeechContext.new(phrases: phrases) if phrases
|
410
|
+
V1beta1::RecognitionConfig.new({
|
411
|
+
encoding: convert_encoding(encoding),
|
412
|
+
sample_rate: sample_rate,
|
413
|
+
language_code: language,
|
414
|
+
max_alternatives: max_alternatives,
|
415
|
+
profanity_filter: profanity_filter,
|
416
|
+
speech_context: context
|
417
|
+
}.delete_if { |_, v| v.nil? })
|
418
|
+
end
|
419
|
+
|
420
|
+
def convert_encoding encoding
|
421
|
+
mapping = { raw: :LINEAR16, linear: :LINEAR16, linear16: :LINEAR16,
|
422
|
+
flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB }
|
423
|
+
mapping[encoding] || encoding
|
424
|
+
end
|
425
|
+
|
426
|
+
##
|
427
|
+
# @private Raise an error unless an active connection to the service is
|
428
|
+
# available.
|
429
|
+
def ensure_service!
|
430
|
+
fail "Must have active connection to service" unless service
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end
|
435
|
+
end
|