google-cloud-speech 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,32 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/credentials"
17
+ require "google/cloud/speech/v1beta1"
18
+
19
+ module Google
20
+ module Cloud
21
+ module Speech
22
+ ##
23
+ # @private Represents the OAuth 2.0 signing logic for Speech.
24
+ class Credentials < Google::Cloud::Credentials
25
+ SCOPE = V1beta1::SpeechApi::ALL_SCOPES
26
+ PATH_ENV_VARS = %w(SPEECH_KEYFILE GOOGLE_CLOUD_KEYFILE GCLOUD_KEYFILE)
27
+ JSON_ENV_VARS = %w(SPEECH_KEYFILE_JSON GOOGLE_CLOUD_KEYFILE_JSON
28
+ GCLOUD_KEYFILE_JSON)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,178 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/speech/v1beta1"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Speech
21
+ ##
22
+ # # Job
23
+ #
24
+ # A resource represents the long-running, asynchronous processing of a
25
+ # speech-recognition operation. The job can be refreshed to retrieve
26
+ # recognition results once the audio data has been processed.
27
+ #
28
+ # See {Project#recognize_job} and {Audio#recognize_job}.
29
+ #
30
+ # @see https://cloud.google.com/speech/docs/basics#async-responses
31
+ # Asynchronous Speech API Responses
32
+ # @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
33
+ # Long-running Operation
34
+ #
35
+ # @example
36
+ # require "google/cloud"
37
+ #
38
+ # gcloud = Google::Cloud.new
39
+ # speech = gcloud.speech
40
+ #
41
+ # job = speech.recognize_job "path/to/audio.raw",
42
+ # encoding: :raw, sample_rate: 16000
43
+ #
44
+ # job.done? #=> false
45
+ # job.reload! # API call
46
+ # job.done? #=> true
47
+ # results = job.results
48
+ #
49
+ class Job
50
+ ##
51
+ # @private The Google::Longrunning::Operation gRPC object.
52
+ attr_accessor :grpc
53
+
54
+ ##
55
+ # @private The gRPC Service object.
56
+ attr_accessor :service
57
+
58
+ ##
59
+ # @private Creates a new Annotation instance.
60
+ def initialize
61
+ @grpc = nil
62
+ @service = nil
63
+ end
64
+
65
+ ##
66
+ # A speech recognition result corresponding to a portion of the audio.
67
+ #
68
+ # @return [Array<Result>] The transcribed text of audio recognized. If
69
+ # the job is not done this will return `nil`.
70
+ #
71
+ # @example
72
+ # require "google/cloud"
73
+ #
74
+ # gcloud = Google::Cloud.new
75
+ # speech = gcloud.speech
76
+ #
77
+ # job = speech.recognize_job "path/to/audio.raw",
78
+ # encoding: :raw, sample_rate: 16000
79
+ #
80
+ # job.done? #=> true
81
+ # results = job.results
82
+ #
83
+ def results
84
+ return nil unless done?
85
+ return nil unless @grpc.result == :response
86
+ resp = V1beta1::AsyncRecognizeResponse.decode(@grpc.response.value)
87
+ resp.results.map do |result_grpc|
88
+ Result.from_grpc result_grpc
89
+ end
90
+ # TODO: Ensure we are raising the proper error
91
+ # TODO: Ensure GRPC behavior here, is an error already raised?
92
+ # raise @grpc.error
93
+ end
94
+
95
+ ##
96
+ # Checks if the speech-recognition processing of the audio data is
97
+ # complete.
98
+ #
99
+ # @return [boolean] `true` when complete, `false` otherwise.
100
+ #
101
+ # @example
102
+ # require "google/cloud"
103
+ #
104
+ # gcloud = Google::Cloud.new
105
+ # speech = gcloud.speech
106
+ #
107
+ # job = speech.recognize_job "path/to/audio.raw",
108
+ # encoding: :raw, sample_rate: 16000
109
+ #
110
+ # job.done? #=> false
111
+ #
112
+ def done?
113
+ @grpc.done
114
+ end
115
+
116
+ ##
117
+ # Reloads the job with current data from the long-running, asynchronous
118
+ # processing of a speech-recognition operation.
119
+ #
120
+ # @example
121
+ # require "google/cloud"
122
+ #
123
+ # gcloud = Google::Cloud.new
124
+ # speech = gcloud.speech
125
+ #
126
+ # job = speech.recognize_job "path/to/audio.raw",
127
+ # encoding: :raw, sample_rate: 16000
128
+ #
129
+ # job.done? #=> false
130
+ # job.reload! # API call
131
+ # job.done? #=> true
132
+ #
133
+ def reload!
134
+ @grpc = @service.get_op @grpc.name
135
+ self
136
+ end
137
+ alias_method :refresh!, :reload!
138
+
139
+ ##
140
+ # Reloads the job until the operation is complete. The delay between
141
+ # reloads will incrementally increase.
142
+ #
143
+ # @example
144
+ # require "google/cloud"
145
+ #
146
+ # gcloud = Google::Cloud.new
147
+ # speech = gcloud.speech
148
+ #
149
+ # job = speech.recognize_job "path/to/audio.raw",
150
+ # encoding: :raw, sample_rate: 16000
151
+ #
152
+ # job.done? #=> false
153
+ # job.wait_until_done!
154
+ # job.done? #=> true
155
+ #
156
+ def wait_until_done!
157
+ backoff = ->(retries) { sleep 2 * retries + 5 }
158
+ retries = 0
159
+ until done?
160
+ backoff.call retries
161
+ retries += 1
162
+ reload!
163
+ end
164
+ end
165
+
166
+ ##
167
+ # @private New Result::Job from a Google::Longrunning::Operation
168
+ # object.
169
+ def self.from_grpc grpc, service
170
+ new.tap do |job|
171
+ job.instance_variable_set :@grpc, grpc
172
+ job.instance_variable_set :@service, service
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,435 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/errors"
17
+ require "google/cloud/core/gce"
18
+ require "google/cloud/speech/service"
19
+ require "google/cloud/speech/audio"
20
+ require "google/cloud/speech/result"
21
+ require "google/cloud/speech/job"
22
+
23
+ module Google
24
+ module Cloud
25
+ module Speech
26
+ ##
27
+ # # Project
28
+ #
29
+ # The Google Cloud Speech API enables developers to convert audio to text
30
+ # by applying powerful neural network models. The API recognizes over 80
31
+ # languages and variants, to support your global user base. You can
32
+ # transcribe the text of users dictating to an application's microphone,
33
+ # enable command-and-control through voice, or transcribe audio files,
34
+ # among many other use cases. Recognize audio uploaded in the request, and
35
+ # integrate with your audio storage on Google Cloud Storage, by using the
36
+ # same technology Google uses to power its own products.
37
+ #
38
+ # See {Google::Cloud#speech}
39
+ #
40
+ # @example
41
+ # require "google/cloud"
42
+ #
43
+ # gcloud = Google::Cloud.new
44
+ # speech = gcloud.speech
45
+ #
46
+ # audio = speech.audio "path/to/audio.raw",
47
+ # encoding: :raw, sample_rate: 16000
48
+ # results = audio.recognize
49
+ #
50
+ # result = results.first
51
+ # result.transcript #=> "how old is the Brooklyn Bridge"
52
+ # result.confidence #=> 88.15
53
+ #
54
+ class Project
55
+ ##
56
+ # @private The gRPC Service object.
57
+ attr_accessor :service
58
+
59
+ ##
60
+ # @private Creates a new Speech Project instance.
61
+ def initialize service
62
+ @service = service
63
+ end
64
+
65
+ # The Speech project connected to.
66
+ #
67
+ # @example
68
+ # require "google/cloud"
69
+ #
70
+ # gcloud = Google::Cloud.new "my-project-id",
71
+ # "/path/to/keyfile.json"
72
+ # speech = gcloud.speech
73
+ #
74
+ # speech.project #=> "my-project-id"
75
+ #
76
+ def project
77
+ service.project
78
+ end
79
+
80
+ ##
81
+ # @private Default project.
82
+ def self.default_project
83
+ ENV["SPEECH_PROJECT"] ||
84
+ ENV["GOOGLE_CLOUD_PROJECT"] ||
85
+ ENV["GCLOUD_PROJECT"] ||
86
+ Google::Cloud::Core::GCE.project_id
87
+ end
88
+
89
+ ##
90
+ # Returns a new Audio instance from the given source. No API call is
91
+ # made.
92
+ #
93
+ # @see https://cloud.google.com/speech/docs/basics#audio-encodings
94
+ # Audio Encodings
95
+ # @see https://cloud.google.com/speech/docs/basics#sample-rates
96
+ # Sample Rates
97
+ # @see https://cloud.google.com/speech/docs/basics#languages
98
+ # Languages
99
+ #
100
+ # @param [String, IO, Google::Cloud::Storage::File] source A string of
101
+ # the path to the audio file to be recognized, or a File or other IO
102
+ # object of the audio contents, or a Cloud Storage URI of the form
103
+ # `"gs://bucketname/path/to/document.ext"`; or an instance of
104
+ # Google::Cloud::Storage::File of the text to be annotated.
105
+ # @param [String, Symbol] encoding Encoding of audio data to be
106
+ # recognized. Optional.
107
+ #
108
+ # Acceptable values are:
109
+ #
110
+ # * `raw` - Uncompressed 16-bit signed little-endian samples.
111
+ # (LINEAR16)
112
+ # * `flac` - The [Free Lossless Audio
113
+ # Codec](http://flac.sourceforge.net/documentation.html) encoding.
114
+ # Only 16-bit samples are supported. Not all fields in STREAMINFO
115
+ # are supported. (FLAC)
116
+ # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
117
+ # G.711 PCMU/mu-law. (MULAW)
118
+ # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
119
+ # be 8000 Hz.) (AMR)
120
+ # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
121
+ # be 16000 Hz.) (AMR_WB)
122
+ #
123
+ # @param [Integer] sample_rate Sample rate in Hertz of the audio data
124
+ # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
125
+ # For best results, set the sampling rate of the audio source to 16000
126
+ # Hz. If that's not possible, use the native sample rate of the audio
127
+ # source (instead of re-sampling). Optional.
128
+ # @param [String] language The language of the supplied audio as a
129
+ # [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
130
+ # code. If not specified, the language defaults to "en-US". See
131
+ # [Language
132
+ # Support](https://cloud.google.com/speech/docs/best-practices#language_support)
133
+ # for a list of the currently supported language codes. Optional.
134
+ #
135
+ # @return [Audio] The audio file to be recognized.
136
+ #
137
+ # @example
138
+ # require "google/cloud"
139
+ #
140
+ # gcloud = Google::Cloud.new
141
+ # speech = gcloud.speech
142
+ #
143
+ # audio = speech.audio "path/to/audio.raw",
144
+ # encoding: :raw, sample_rate: 16000
145
+ #
146
+ # @example With a Google Cloud Storage URI:
147
+ # require "google/cloud"
148
+ #
149
+ # gcloud = Google::Cloud.new
150
+ # speech = gcloud.speech
151
+ #
152
+ # audio = speech.audio "gs://bucket-name/path/to/audio.raw",
153
+ # encoding: :raw, sample_rate: 16000
154
+ #
155
+ # @example With a Google Cloud Storage File object:
156
+ # require "google/cloud"
157
+ #
158
+ # gcloud = Google::Cloud.new
159
+ # storage = gcloud.storage
160
+ #
161
+ # bucket = storage.bucket "bucket-name"
162
+ # file = bucket.file "path/to/audio.raw"
163
+ #
164
+ # speech = gcloud.speech
165
+ #
166
+ # audio = speech.audio file, encoding: :raw, sample_rate: 16000
167
+ #
168
+ def audio source, encoding: nil, sample_rate: nil, language: nil
169
+ if source.is_a? Audio
170
+ audio = source.dup
171
+ else
172
+ audio = Audio.from_source source, self
173
+ end
174
+ audio.encoding = encoding unless encoding.nil?
175
+ audio.sample_rate = sample_rate unless sample_rate.nil?
176
+ audio.language = language unless language.nil?
177
+ audio
178
+ end
179
+
180
+ ##
181
+ # Performs synchronous speech recognition. Sends audio data to the
182
+ # Speech API, which performs recognition on that data, and returns
183
+ # results only after all audio has been processed. Limited to audio data
184
+ # of 1 minute or less in duration.
185
+ #
186
+ # The Speech API will take roughly the same amount of time to process
187
+ # audio data sent synchronously as the duration of the supplied audio
188
+ # data. That is, if you send audio data of 30 seconds in length, expect
189
+ # the synchronous request to take approximately 30 seconds to return
190
+ # results.
191
+ #
192
+ # @see https://cloud.google.com/speech/docs/basics#synchronous-recognition
193
+ # Synchronous Speech API Recognition
194
+ # @see https://cloud.google.com/speech/docs/basics#phrase-hints
195
+ # Phrase Hints
196
+ #
197
+ # @param [String, IO, Google::Cloud::Storage::File] source A string of
198
+ # the path to the audio file to be recognized, or a File or other IO
199
+ # object of the audio contents, or a Cloud Storage URI of the form
200
+ # `"gs://bucketname/path/to/document.ext"`; or an instance of
201
+ # Google::Cloud::Storage::File of the text to be annotated.
202
+ # @param [String, Symbol] encoding Encoding of audio data to be
203
+ # recognized. Optional.
204
+ #
205
+ # Acceptable values are:
206
+ #
207
+ # * `raw` - Uncompressed 16-bit signed little-endian samples.
208
+ # (LINEAR16)
209
+ # * `flac` - The [Free Lossless Audio
210
+ # Codec](http://flac.sourceforge.net/documentation.html) encoding.
211
+ # Only 16-bit samples are supported. Not all fields in STREAMINFO
212
+ # are supported. (FLAC)
213
+ # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
214
+ # G.711 PCMU/mu-law. (MULAW)
215
+ # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
216
+ # be 8000 Hz.) (AMR)
217
+ # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
218
+ # be 16000 Hz.) (AMR_WB)
219
+ #
220
+ # @param [Integer] sample_rate Sample rate in Hertz of the audio data
221
+ # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
222
+ # For best results, set the sampling rate of the audio source to 16000
223
+ # Hz. If that's not possible, use the native sample rate of the audio
224
+ # source (instead of re-sampling). Optional.
225
+ # @param [String] language The language of the supplied audio as a
226
+ # [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
227
+ # code. If not specified, the language defaults to "en-US". See
228
+ # [Language
229
+ # Support](https://cloud.google.com/speech/docs/best-practices#language_support)
230
+ # for a list of the currently supported language codes. Optional.
231
+ # @param [String] max_alternatives The Maximum number of recognition
232
+ # hypotheses to be returned. Default is 1. The service may return
233
+ # fewer. Valid values are 0-30. Defaults to 1. Optional.
234
+ # @param [Boolean] profanity_filter When `true`, the service will
235
+ # attempt to filter out profanities, replacing all but the initial
236
+ # character in each filtered word with asterisks, e.g. "f***". Default
237
+ # is `false`.
238
+ # @param [Array<String>] phrases A list of strings containing words and
239
+ # phrases "hints" so that the speech recognition is more likely to
240
+ # recognize them. See [usage
241
+ # limits](https://cloud.google.com/speech/limits#content). Optional.
242
+ #
243
+ # @return [Array<Result>] The transcribed text of audio recognized.
244
+ #
245
+ # @example
246
+ # require "google/cloud"
247
+ #
248
+ # gcloud = Google::Cloud.new
249
+ # speech = gcloud.speech
250
+ #
251
+ # results = speech.recognize "path/to/audio.raw",
252
+ # encoding: :raw, sample_rate: 16000
253
+ #
254
+ # @example With a Google Cloud Storage URI:
255
+ # require "google/cloud"
256
+ #
257
+ # gcloud = Google::Cloud.new
258
+ # speech = gcloud.speech
259
+ #
260
+ # results = speech.recognize "gs://bucket-name/path/to/audio.raw",
261
+ # encoding: :raw, sample_rate: 16000
262
+ #
263
+ # @example With a Google Cloud Storage File object:
264
+ # require "google/cloud"
265
+ #
266
+ # gcloud = Google::Cloud.new
267
+ # storage = gcloud.storage
268
+ #
269
+ # bucket = storage.bucket "bucket-name"
270
+ # file = bucket.file "path/to/audio.raw"
271
+ #
272
+ # speech = gcloud.speech
273
+ #
274
+ # results = speech.recognize file, encoding: :raw,
275
+ # sample_rate: 16000,
276
+ # max_alternatives: 10
277
+ #
278
+ def recognize source, encoding: nil, sample_rate: nil, language: nil,
279
+ max_alternatives: nil, profanity_filter: nil, phrases: nil
280
+ ensure_service!
281
+
282
+ audio_obj = audio source, encoding: encoding,
283
+ sample_rate: sample_rate, language: language
284
+
285
+ config = audio_config(
286
+ encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
287
+ language: audio_obj.language, max_alternatives: max_alternatives,
288
+ profanity_filter: profanity_filter, phrases: phrases)
289
+
290
+ grpc = service.recognize_sync audio_obj.to_grpc, config
291
+ grpc.results.map do |result_grpc|
292
+ Result.from_grpc result_grpc
293
+ end
294
+ end
295
+
296
+ ##
297
+ # Performs asynchronous speech recognition. Requests are processed
298
+ # asynchronously, meaning a Job is returned once the audio data has been
299
+ # sent, and can be refreshed to retrieve recognition results once the
300
+ # audio data has been processed.
301
+ #
302
+ # @see https://cloud.google.com/speech/docs/basics#async-responses
303
+ # Asynchronous Speech API Responses
304
+ #
305
+ # @param [String, IO, Google::Cloud::Storage::File] source A string of
306
+ # the path to the audio file to be recognized, or a File or other IO
307
+ # object of the audio contents, or a Cloud Storage URI of the form
308
+ # `"gs://bucketname/path/to/document.ext"`; or an instance of
309
+ # Google::Cloud::Storage::File of the text to be annotated.
310
+ # @param [String, Symbol] encoding Encoding of audio data to be
311
+ # recognized. Optional.
312
+ #
313
+ # Currently, the only acceptable value is:
314
+ #
315
+ # * `raw` - Uncompressed 16-bit signed little-endian samples.
316
+ # (LINEAR16)
317
+ #
318
+ # @param [Integer] sample_rate Sample rate in Hertz of the audio data
319
+ # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
320
+ # For best results, set the sampling rate of the audio source to 16000
321
+ # Hz. If that's not possible, use the native sample rate of the audio
322
+ # source (instead of re-sampling). Optional.
323
+ # @param [String] language The language of the supplied audio as a
324
+ # [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
325
+ # code. If not specified, the language defaults to "en-US". See
326
+ # [Language
327
+ # Support](https://cloud.google.com/speech/docs/best-practices#language_support)
328
+ # for a list of the currently supported language codes. Optional.
329
+ # @param [String] max_alternatives The Maximum number of recognition
330
+ # hypotheses to be returned. Default is 1. The service may return
331
+ # fewer. Valid values are 0-30. Defaults to 1. Optional.
332
+ # @param [Boolean] profanity_filter When `true`, the service will
333
+ # attempt to filter out profanities, replacing all but the initial
334
+ # character in each filtered word with asterisks, e.g. "f***". Default
335
+ # is `false`.
336
+ # @param [Array<String>] phrases A list of strings containing words and
337
+ # phrases "hints" so that the speech recognition is more likely to
338
+ # recognize them. See [usage
339
+ # limits](https://cloud.google.com/speech/limits#content). Optional.
340
+ #
341
+ # @return [Job] A resource represents the long-running, asynchronous
342
+ # processing of a speech-recognition operation.
343
+ #
344
+ # @example
345
+ # require "google/cloud"
346
+ #
347
+ # gcloud = Google::Cloud.new
348
+ # speech = gcloud.speech
349
+ #
350
+ # job = speech.recognize_job "path/to/audio.raw",
351
+ # encoding: :raw, sample_rate: 16000
352
+ #
353
+ # job.done? #=> false
354
+ # job.reload!
355
+ #
356
+ # @example With a Google Cloud Storage URI:
357
+ # require "google/cloud"
358
+ #
359
+ # gcloud = Google::Cloud.new
360
+ # speech = gcloud.speech
361
+ #
362
+ # job = speech.recognize_job "gs://bucket-name/path/to/audio.raw",
363
+ # encoding: :raw, sample_rate: 16000
364
+ #
365
+ # job.done? #=> false
366
+ # job.reload!
367
+ #
368
+ # @example With a Google Cloud Storage File object:
369
+ # require "google/cloud"
370
+ #
371
+ # gcloud = Google::Cloud.new
372
+ # storage = gcloud.storage
373
+ #
374
+ # bucket = storage.bucket "bucket-name"
375
+ # file = bucket.file "path/to/audio.raw"
376
+ #
377
+ # speech = gcloud.speech
378
+ #
379
+ # job = speech.recognize_job file, encoding: :raw,
380
+ # sample_rate: 16000,
381
+ # max_alternatives: 10
382
+ #
383
+ # job.done? #=> false
384
+ # job.reload!
385
+ #
386
+ def recognize_job source, encoding: nil, sample_rate: nil,
387
+ language: nil, max_alternatives: nil,
388
+ profanity_filter: nil, phrases: nil
389
+ ensure_service!
390
+
391
+ audio_obj = audio source, encoding: encoding,
392
+ sample_rate: sample_rate, language: language
393
+
394
+ config = audio_config(
395
+ encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
396
+ language: audio_obj.language, max_alternatives: max_alternatives,
397
+ profanity_filter: profanity_filter, phrases: phrases)
398
+
399
+ grpc = service.recognize_async audio_obj.to_grpc, config
400
+ Job.from_grpc grpc, service
401
+ end
402
+
403
+ protected
404
+
405
+ def audio_config encoding: nil, sample_rate: nil, language: nil,
406
+ max_alternatives: nil, profanity_filter: nil,
407
+ phrases: nil
408
+ context = nil
409
+ context = V1beta1::SpeechContext.new(phrases: phrases) if phrases
410
+ V1beta1::RecognitionConfig.new({
411
+ encoding: convert_encoding(encoding),
412
+ sample_rate: sample_rate,
413
+ language_code: language,
414
+ max_alternatives: max_alternatives,
415
+ profanity_filter: profanity_filter,
416
+ speech_context: context
417
+ }.delete_if { |_, v| v.nil? })
418
+ end
419
+
420
+ def convert_encoding encoding
421
+ mapping = { raw: :LINEAR16, linear: :LINEAR16, linear16: :LINEAR16,
422
+ flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB }
423
+ mapping[encoding] || encoding
424
+ end
425
+
426
+ ##
427
+ # @private Raise an error unless an active connection to the service is
428
+ # available.
429
+ def ensure_service!
430
+ fail "Must have active connection to service" unless service
431
+ end
432
+ end
433
+ end
434
+ end
435
+ end