google-cloud-speech 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/credentials"
17
+ require "google/cloud/speech/v1beta1"
18
+
19
+ module Google
20
+ module Cloud
21
+ module Speech
22
+ ##
23
+ # @private Represents the OAuth 2.0 signing logic for Speech.
24
+ class Credentials < Google::Cloud::Credentials
25
+ SCOPE = V1beta1::SpeechApi::ALL_SCOPES
26
+ PATH_ENV_VARS = %w(SPEECH_KEYFILE GOOGLE_CLOUD_KEYFILE GCLOUD_KEYFILE)
27
+ JSON_ENV_VARS = %w(SPEECH_KEYFILE_JSON GOOGLE_CLOUD_KEYFILE_JSON
28
+ GCLOUD_KEYFILE_JSON)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,178 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/speech/v1beta1"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Speech
21
+ ##
22
+ # # Job
23
+ #
24
+ # A resource represents the long-running, asynchronous processing of a
25
+ # speech-recognition operation. The job can be refreshed to retrieve
26
+ # recognition results once the audio data has been processed.
27
+ #
28
+ # See {Project#recognize_job} and {Audio#recognize_job}.
29
+ #
30
+ # @see https://cloud.google.com/speech/docs/basics#async-responses
31
+ # Asynchronous Speech API Responses
32
+ # @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
33
+ # Long-running Operation
34
+ #
35
+ # @example
36
+ # require "google/cloud"
37
+ #
38
+ # gcloud = Google::Cloud.new
39
+ # speech = gcloud.speech
40
+ #
41
+ # job = speech.recognize_job "path/to/audio.raw",
42
+ # encoding: :raw, sample_rate: 16000
43
+ #
44
+ # job.done? #=> false
45
+ # job.reload! # API call
46
+ # job.done? #=> true
47
+ # results = job.results
48
+ #
49
+ class Job
50
+ ##
51
+ # @private The Google::Longrunning::Operation gRPC object.
52
+ attr_accessor :grpc
53
+
54
+ ##
55
+ # @private The gRPC Service object.
56
+ attr_accessor :service
57
+
58
+ ##
59
+ # @private Creates a new Annotation instance.
60
+ def initialize
61
+ @grpc = nil
62
+ @service = nil
63
+ end
64
+
65
+ ##
66
+ # A speech recognition result corresponding to a portion of the audio.
67
+ #
68
+ # @return [Array<Result>] The transcribed text of audio recognized. If
69
+ # the job is not done this will return `nil`.
70
+ #
71
+ # @example
72
+ # require "google/cloud"
73
+ #
74
+ # gcloud = Google::Cloud.new
75
+ # speech = gcloud.speech
76
+ #
77
+ # job = speech.recognize_job "path/to/audio.raw",
78
+ # encoding: :raw, sample_rate: 16000
79
+ #
80
+ # job.done? #=> true
81
+ # results = job.results
82
+ #
83
+ def results
84
+ return nil unless done?
85
+ return nil unless @grpc.result == :response
86
+ resp = V1beta1::AsyncRecognizeResponse.decode(@grpc.response.value)
87
+ resp.results.map do |result_grpc|
88
+ Result.from_grpc result_grpc
89
+ end
90
+ # TODO: Ensure we are raising the proper error
91
+ # TODO: Ensure GRPC behavior here, is an error already raised?
92
+ # raise @grpc.error
93
+ end
94
+
95
+ ##
96
+ # Checks if the speech-recognition processing of the audio data is
97
+ # complete.
98
+ #
99
+ # @return [boolean] `true` when complete, `false` otherwise.
100
+ #
101
+ # @example
102
+ # require "google/cloud"
103
+ #
104
+ # gcloud = Google::Cloud.new
105
+ # speech = gcloud.speech
106
+ #
107
+ # job = speech.recognize_job "path/to/audio.raw",
108
+ # encoding: :raw, sample_rate: 16000
109
+ #
110
+ # job.done? #=> false
111
+ #
112
+ def done?
113
+ @grpc.done
114
+ end
115
+
116
+ ##
117
+ # Reloads the job with current data from the long-running, asynchronous
118
+ # processing of a speech-recognition operation.
119
+ #
120
+ # @example
121
+ # require "google/cloud"
122
+ #
123
+ # gcloud = Google::Cloud.new
124
+ # speech = gcloud.speech
125
+ #
126
+ # job = speech.recognize_job "path/to/audio.raw",
127
+ # encoding: :raw, sample_rate: 16000
128
+ #
129
+ # job.done? #=> false
130
+ # job.reload! # API call
131
+ # job.done? #=> true
132
+ #
133
+ def reload!
134
+ @grpc = @service.get_op @grpc.name
135
+ self
136
+ end
137
+ alias_method :refresh!, :reload!
138
+
139
+ ##
140
+ # Reloads the job until the operation is complete. The delay between
141
+ # reloads will incrementally increase.
142
+ #
143
+ # @example
144
+ # require "google/cloud"
145
+ #
146
+ # gcloud = Google::Cloud.new
147
+ # speech = gcloud.speech
148
+ #
149
+ # job = speech.recognize_job "path/to/audio.raw",
150
+ # encoding: :raw, sample_rate: 16000
151
+ #
152
+ # job.done? #=> false
153
+ # job.wait_until_done!
154
+ # job.done? #=> true
155
+ #
156
+ def wait_until_done!
157
+ backoff = ->(retries) { sleep 2 * retries + 5 }
158
+ retries = 0
159
+ until done?
160
+ backoff.call retries
161
+ retries += 1
162
+ reload!
163
+ end
164
+ end
165
+
166
+ ##
167
+ # @private New Result::Job from a Google::Longrunning::Operation
168
+ # object.
169
+ def self.from_grpc grpc, service
170
+ new.tap do |job|
171
+ job.instance_variable_set :@grpc, grpc
172
+ job.instance_variable_set :@service, service
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,435 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/errors"
17
+ require "google/cloud/core/gce"
18
+ require "google/cloud/speech/service"
19
+ require "google/cloud/speech/audio"
20
+ require "google/cloud/speech/result"
21
+ require "google/cloud/speech/job"
22
+
23
+ module Google
24
+ module Cloud
25
+ module Speech
26
+ ##
27
+ # # Project
28
+ #
29
+ # The Google Cloud Speech API enables developers to convert audio to text
30
+ # by applying powerful neural network models. The API recognizes over 80
31
+ # languages and variants, to support your global user base. You can
32
+ # transcribe the text of users dictating to an application's microphone,
33
+ # enable command-and-control through voice, or transcribe audio files,
34
+ # among many other use cases. Recognize audio uploaded in the request, and
35
+ # integrate with your audio storage on Google Cloud Storage, by using the
36
+ # same technology Google uses to power its own products.
37
+ #
38
+ # See {Google::Cloud#speech}
39
+ #
40
+ # @example
41
+ # require "google/cloud"
42
+ #
43
+ # gcloud = Google::Cloud.new
44
+ # speech = gcloud.speech
45
+ #
46
+ # audio = speech.audio "path/to/audio.raw",
47
+ # encoding: :raw, sample_rate: 16000
48
+ # results = audio.recognize
49
+ #
50
+ # result = results.first
51
+ # result.transcript #=> "how old is the Brooklyn Bridge"
52
+ # result.confidence #=> 88.15
53
+ #
54
+ class Project
55
+ ##
56
+ # @private The gRPC Service object.
57
+ attr_accessor :service
58
+
59
+ ##
60
+ # @private Creates a new Speech Project instance.
61
+ def initialize service
62
+ @service = service
63
+ end
64
+
65
+ # The Speech project connected to.
66
+ #
67
+ # @example
68
+ # require "google/cloud"
69
+ #
70
+ # gcloud = Google::Cloud.new "my-project-id",
71
+ # "/path/to/keyfile.json"
72
+ # speech = gcloud.speech
73
+ #
74
+ # speech.project #=> "my-project-id"
75
+ #
76
+ def project
77
+ service.project
78
+ end
79
+
80
+ ##
81
+ # @private Default project.
82
+ def self.default_project
83
+ ENV["SPEECH_PROJECT"] ||
84
+ ENV["GOOGLE_CLOUD_PROJECT"] ||
85
+ ENV["GCLOUD_PROJECT"] ||
86
+ Google::Cloud::Core::GCE.project_id
87
+ end
88
+
89
+ ##
90
+ # Returns a new Audio instance from the given source. No API call is
91
+ # made.
92
+ #
93
+ # @see https://cloud.google.com/speech/docs/basics#audio-encodings
94
+ # Audio Encodings
95
+ # @see https://cloud.google.com/speech/docs/basics#sample-rates
96
+ # Sample Rates
97
+ # @see https://cloud.google.com/speech/docs/basics#languages
98
+ # Languages
99
+ #
100
+ # @param [String, IO, Google::Cloud::Storage::File] source A string of
101
+ # the path to the audio file to be recognized, or a File or other IO
102
+ # object of the audio contents, or a Cloud Storage URI of the form
103
+ # `"gs://bucketname/path/to/document.ext"`; or an instance of
104
+ # Google::Cloud::Storage::File of the text to be annotated.
105
+ # @param [String, Symbol] encoding Encoding of audio data to be
106
+ # recognized. Optional.
107
+ #
108
+ # Acceptable values are:
109
+ #
110
+ # * `raw` - Uncompressed 16-bit signed little-endian samples.
111
+ # (LINEAR16)
112
+ # * `flac` - The [Free Lossless Audio
113
+ # Codec](http://flac.sourceforge.net/documentation.html) encoding.
114
+ # Only 16-bit samples are supported. Not all fields in STREAMINFO
115
+ # are supported. (FLAC)
116
+ # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
117
+ # G.711 PCMU/mu-law. (MULAW)
118
+ # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
119
+ # be 8000 Hz.) (AMR)
120
+ # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
121
+ # be 16000 Hz.) (AMR_WB)
122
+ #
123
+ # @param [Integer] sample_rate Sample rate in Hertz of the audio data
124
+ # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
125
+ # For best results, set the sampling rate of the audio source to 16000
126
+ # Hz. If that's not possible, use the native sample rate of the audio
127
+ # source (instead of re-sampling). Optional.
128
+ # @param [String] language The language of the supplied audio as a
129
+ # [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
130
+ # code. If not specified, the language defaults to "en-US". See
131
+ # [Language
132
+ # Support](https://cloud.google.com/speech/docs/best-practices#language_support)
133
+ # for a list of the currently supported language codes. Optional.
134
+ #
135
+ # @return [Audio] The audio file to be recognized.
136
+ #
137
+ # @example
138
+ # require "google/cloud"
139
+ #
140
+ # gcloud = Google::Cloud.new
141
+ # speech = gcloud.speech
142
+ #
143
+ # audio = speech.audio "path/to/audio.raw",
144
+ # encoding: :raw, sample_rate: 16000
145
+ #
146
+ # @example With a Google Cloud Storage URI:
147
+ # require "google/cloud"
148
+ #
149
+ # gcloud = Google::Cloud.new
150
+ # speech = gcloud.speech
151
+ #
152
+ # audio = speech.audio "gs://bucket-name/path/to/audio.raw",
153
+ # encoding: :raw, sample_rate: 16000
154
+ #
155
+ # @example With a Google Cloud Storage File object:
156
+ # require "google/cloud"
157
+ #
158
+ # gcloud = Google::Cloud.new
159
+ # storage = gcloud.storage
160
+ #
161
+ # bucket = storage.bucket "bucket-name"
162
+ # file = bucket.file "path/to/audio.raw"
163
+ #
164
+ # speech = gcloud.speech
165
+ #
166
+ # audio = speech.audio file, encoding: :raw, sample_rate: 16000
167
+ #
168
+ def audio source, encoding: nil, sample_rate: nil, language: nil
169
+ if source.is_a? Audio
170
+ audio = source.dup
171
+ else
172
+ audio = Audio.from_source source, self
173
+ end
174
+ audio.encoding = encoding unless encoding.nil?
175
+ audio.sample_rate = sample_rate unless sample_rate.nil?
176
+ audio.language = language unless language.nil?
177
+ audio
178
+ end
179
+
180
+ ##
181
+ # Performs synchronous speech recognition. Sends audio data to the
182
+ # Speech API, which performs recognition on that data, and returns
183
+ # results only after all audio has been processed. Limited to audio data
184
+ # of 1 minute or less in duration.
185
+ #
186
+ # The Speech API will take roughly the same amount of time to process
187
+ # audio data sent synchronously as the duration of the supplied audio
188
+ # data. That is, if you send audio data of 30 seconds in length, expect
189
+ # the synchronous request to take approximately 30 seconds to return
190
+ # results.
191
+ #
192
+ # @see https://cloud.google.com/speech/docs/basics#synchronous-recognition
193
+ # Synchronous Speech API Recognition
194
+ # @see https://cloud.google.com/speech/docs/basics#phrase-hints
195
+ # Phrase Hints
196
+ #
197
+ # @param [String, IO, Google::Cloud::Storage::File] source A string of
198
+ # the path to the audio file to be recognized, or a File or other IO
199
+ # object of the audio contents, or a Cloud Storage URI of the form
200
+ # `"gs://bucketname/path/to/document.ext"`; or an instance of
201
+ # Google::Cloud::Storage::File of the text to be annotated.
202
+ # @param [String, Symbol] encoding Encoding of audio data to be
203
+ # recognized. Optional.
204
+ #
205
+ # Acceptable values are:
206
+ #
207
+ # * `raw` - Uncompressed 16-bit signed little-endian samples.
208
+ # (LINEAR16)
209
+ # * `flac` - The [Free Lossless Audio
210
+ # Codec](http://flac.sourceforge.net/documentation.html) encoding.
211
+ # Only 16-bit samples are supported. Not all fields in STREAMINFO
212
+ # are supported. (FLAC)
213
+ # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
214
+ # G.711 PCMU/mu-law. (MULAW)
215
+ # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
216
+ # be 8000 Hz.) (AMR)
217
+ # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
218
+ # be 16000 Hz.) (AMR_WB)
219
+ #
220
+ # @param [Integer] sample_rate Sample rate in Hertz of the audio data
221
+ # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
222
+ # For best results, set the sampling rate of the audio source to 16000
223
+ # Hz. If that's not possible, use the native sample rate of the audio
224
+ # source (instead of re-sampling). Optional.
225
+ # @param [String] language The language of the supplied audio as a
226
+ # [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
227
+ # code. If not specified, the language defaults to "en-US". See
228
+ # [Language
229
+ # Support](https://cloud.google.com/speech/docs/best-practices#language_support)
230
+ # for a list of the currently supported language codes. Optional.
231
+ # @param [String] max_alternatives The Maximum number of recognition
232
+ # hypotheses to be returned. Default is 1. The service may return
233
+ # fewer. Valid values are 0-30. Defaults to 1. Optional.
234
+ # @param [Boolean] profanity_filter When `true`, the service will
235
+ # attempt to filter out profanities, replacing all but the initial
236
+ # character in each filtered word with asterisks, e.g. "f***". Default
237
+ # is `false`.
238
+ # @param [Array<String>] phrases A list of strings containing words and
239
+ # phrases "hints" so that the speech recognition is more likely to
240
+ # recognize them. See [usage
241
+ # limits](https://cloud.google.com/speech/limits#content). Optional.
242
+ #
243
+ # @return [Array<Result>] The transcribed text of audio recognized.
244
+ #
245
+ # @example
246
+ # require "google/cloud"
247
+ #
248
+ # gcloud = Google::Cloud.new
249
+ # speech = gcloud.speech
250
+ #
251
+ # results = speech.recognize "path/to/audio.raw",
252
+ # encoding: :raw, sample_rate: 16000
253
+ #
254
+ # @example With a Google Cloud Storage URI:
255
+ # require "google/cloud"
256
+ #
257
+ # gcloud = Google::Cloud.new
258
+ # speech = gcloud.speech
259
+ #
260
+ # results = speech.recognize "gs://bucket-name/path/to/audio.raw",
261
+ # encoding: :raw, sample_rate: 16000
262
+ #
263
+ # @example With a Google Cloud Storage File object:
264
+ # require "google/cloud"
265
+ #
266
+ # gcloud = Google::Cloud.new
267
+ # storage = gcloud.storage
268
+ #
269
+ # bucket = storage.bucket "bucket-name"
270
+ # file = bucket.file "path/to/audio.raw"
271
+ #
272
+ # speech = gcloud.speech
273
+ #
274
+ # results = speech.recognize file, encoding: :raw,
275
+ # sample_rate: 16000,
276
+ # max_alternatives: 10
277
+ #
278
+ def recognize source, encoding: nil, sample_rate: nil, language: nil,
279
+ max_alternatives: nil, profanity_filter: nil, phrases: nil
280
+ ensure_service!
281
+
282
+ audio_obj = audio source, encoding: encoding,
283
+ sample_rate: sample_rate, language: language
284
+
285
+ config = audio_config(
286
+ encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
287
+ language: audio_obj.language, max_alternatives: max_alternatives,
288
+ profanity_filter: profanity_filter, phrases: phrases)
289
+
290
+ grpc = service.recognize_sync audio_obj.to_grpc, config
291
+ grpc.results.map do |result_grpc|
292
+ Result.from_grpc result_grpc
293
+ end
294
+ end
295
+
296
+ ##
297
+ # Performs asynchronous speech recognition. Requests are processed
298
+ # asynchronously, meaning a Job is returned once the audio data has been
299
+ # sent, and can be refreshed to retrieve recognition results once the
300
+ # audio data has been processed.
301
+ #
302
+ # @see https://cloud.google.com/speech/docs/basics#async-responses
303
+ # Asynchronous Speech API Responses
304
+ #
305
+ # @param [String, IO, Google::Cloud::Storage::File] source A string of
306
+ # the path to the audio file to be recognized, or a File or other IO
307
+ # object of the audio contents, or a Cloud Storage URI of the form
308
+ # `"gs://bucketname/path/to/document.ext"`; or an instance of
309
+ # Google::Cloud::Storage::File of the text to be annotated.
310
+ # @param [String, Symbol] encoding Encoding of audio data to be
311
+ # recognized. Optional.
312
+ #
313
+ # Currently, the only acceptable value is:
314
+ #
315
+ # * `raw` - Uncompressed 16-bit signed little-endian samples.
316
+ # (LINEAR16)
317
+ #
318
+ # @param [Integer] sample_rate Sample rate in Hertz of the audio data
319
+ # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
320
+ # For best results, set the sampling rate of the audio source to 16000
321
+ # Hz. If that's not possible, use the native sample rate of the audio
322
+ # source (instead of re-sampling). Optional.
323
+ # @param [String] language The language of the supplied audio as a
324
+ # [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
325
+ # code. If not specified, the language defaults to "en-US". See
326
+ # [Language
327
+ # Support](https://cloud.google.com/speech/docs/best-practices#language_support)
328
+ # for a list of the currently supported language codes. Optional.
329
+ # @param [String] max_alternatives The Maximum number of recognition
330
+ # hypotheses to be returned. Default is 1. The service may return
331
+ # fewer. Valid values are 0-30. Defaults to 1. Optional.
332
+ # @param [Boolean] profanity_filter When `true`, the service will
333
+ # attempt to filter out profanities, replacing all but the initial
334
+ # character in each filtered word with asterisks, e.g. "f***". Default
335
+ # is `false`.
336
+ # @param [Array<String>] phrases A list of strings containing words and
337
+ # phrases "hints" so that the speech recognition is more likely to
338
+ # recognize them. See [usage
339
+ # limits](https://cloud.google.com/speech/limits#content). Optional.
340
+ #
341
+ # @return [Job] A resource represents the long-running, asynchronous
342
+ # processing of a speech-recognition operation.
343
+ #
344
+ # @example
345
+ # require "google/cloud"
346
+ #
347
+ # gcloud = Google::Cloud.new
348
+ # speech = gcloud.speech
349
+ #
350
+ # job = speech.recognize_job "path/to/audio.raw",
351
+ # encoding: :raw, sample_rate: 16000
352
+ #
353
+ # job.done? #=> false
354
+ # job.reload!
355
+ #
356
+ # @example With a Google Cloud Storage URI:
357
+ # require "google/cloud"
358
+ #
359
+ # gcloud = Google::Cloud.new
360
+ # speech = gcloud.speech
361
+ #
362
+ # job = speech.recognize_job "gs://bucket-name/path/to/audio.raw",
363
+ # encoding: :raw, sample_rate: 16000
364
+ #
365
+ # job.done? #=> false
366
+ # job.reload!
367
+ #
368
+ # @example With a Google Cloud Storage File object:
369
+ # require "google/cloud"
370
+ #
371
+ # gcloud = Google::Cloud.new
372
+ # storage = gcloud.storage
373
+ #
374
+ # bucket = storage.bucket "bucket-name"
375
+ # file = bucket.file "path/to/audio.raw"
376
+ #
377
+ # speech = gcloud.speech
378
+ #
379
+ # job = speech.recognize_job file, encoding: :raw,
380
+ # sample_rate: 16000,
381
+ # max_alternatives: 10
382
+ #
383
+ # job.done? #=> false
384
+ # job.reload!
385
+ #
386
+ def recognize_job source, encoding: nil, sample_rate: nil,
387
+ language: nil, max_alternatives: nil,
388
+ profanity_filter: nil, phrases: nil
389
+ ensure_service!
390
+
391
+ audio_obj = audio source, encoding: encoding,
392
+ sample_rate: sample_rate, language: language
393
+
394
+ config = audio_config(
395
+ encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
396
+ language: audio_obj.language, max_alternatives: max_alternatives,
397
+ profanity_filter: profanity_filter, phrases: phrases)
398
+
399
+ grpc = service.recognize_async audio_obj.to_grpc, config
400
+ Job.from_grpc grpc, service
401
+ end
402
+
403
+ protected
404
+
405
+ def audio_config encoding: nil, sample_rate: nil, language: nil,
406
+ max_alternatives: nil, profanity_filter: nil,
407
+ phrases: nil
408
+ context = nil
409
+ context = V1beta1::SpeechContext.new(phrases: phrases) if phrases
410
+ V1beta1::RecognitionConfig.new({
411
+ encoding: convert_encoding(encoding),
412
+ sample_rate: sample_rate,
413
+ language_code: language,
414
+ max_alternatives: max_alternatives,
415
+ profanity_filter: profanity_filter,
416
+ speech_context: context
417
+ }.delete_if { |_, v| v.nil? })
418
+ end
419
+
420
+ def convert_encoding encoding
421
+ mapping = { raw: :LINEAR16, linear: :LINEAR16, linear16: :LINEAR16,
422
+ flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB }
423
+ mapping[encoding] || encoding
424
+ end
425
+
426
+ ##
427
+ # @private Raise an error unless an active connection to the service is
428
+ # available.
429
+ def ensure_service!
430
+ fail "Must have active connection to service" unless service
431
+ end
432
+ end
433
+ end
434
+ end
435
+ end