google-cloud-speech 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +0 -2
- data/README.md +4 -2
- data/lib/google-cloud-speech.rb +6 -2
- data/lib/google/cloud/speech.rb +37 -23
- data/lib/google/cloud/speech/audio.rb +73 -44
- data/lib/google/cloud/speech/credentials.rb +2 -2
- data/lib/google/cloud/speech/operation.rb +262 -0
- data/lib/google/cloud/speech/project.rb +186 -83
- data/lib/google/cloud/speech/result.rb +14 -8
- data/lib/google/cloud/speech/service.rb +12 -6
- data/lib/google/cloud/speech/stream.rb +128 -131
- data/lib/google/cloud/speech/{v1beta1.rb → v1.rb} +2 -3
- data/lib/google/cloud/speech/v1/cloud_speech_pb.rb +116 -0
- data/lib/google/cloud/speech/{v1beta1 → v1}/cloud_speech_services_pb.rb +11 -11
- data/lib/google/cloud/speech/{v1beta1/doc/google/cloud/speech/v1beta1 → v1/doc/google/cloud/speech/v1}/cloud_speech.rb +157 -161
- data/lib/google/cloud/speech/{v1beta1 → v1}/doc/google/protobuf/any.rb +0 -0
- data/lib/google/cloud/speech/{v1beta1 → v1}/doc/google/rpc/status.rb +0 -0
- data/lib/google/cloud/speech/{v1beta1 → v1}/speech_client.rb +71 -58
- data/lib/google/cloud/speech/{v1beta1 → v1}/speech_client_config.json +8 -8
- data/lib/google/cloud/speech/version.rb +1 -1
- metadata +13 -13
- data/lib/google/cloud/speech/job.rb +0 -159
- data/lib/google/cloud/speech/v1beta1/cloud_speech_pb.rb +0 -116
@@ -18,7 +18,7 @@ require "google/cloud/env"
|
|
18
18
|
require "google/cloud/speech/service"
|
19
19
|
require "google/cloud/speech/audio"
|
20
20
|
require "google/cloud/speech/result"
|
21
|
-
require "google/cloud/speech/
|
21
|
+
require "google/cloud/speech/operation"
|
22
22
|
require "google/cloud/speech/stream"
|
23
23
|
|
24
24
|
module Google
|
@@ -44,7 +44,9 @@ module Google
|
|
44
44
|
# speech = Google::Cloud::Speech.new
|
45
45
|
#
|
46
46
|
# audio = speech.audio "path/to/audio.raw",
|
47
|
-
# encoding: :raw,
|
47
|
+
# encoding: :raw,
|
48
|
+
# language: "en-US",
|
49
|
+
# sample_rate: 16000
|
48
50
|
# results = audio.recognize
|
49
51
|
#
|
50
52
|
# result = results.first
|
@@ -120,18 +122,27 @@ module Google
|
|
120
122
|
# be 8000 Hz.) (AMR)
|
121
123
|
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
122
124
|
# be 16000 Hz.) (AMR_WB)
|
123
|
-
#
|
125
|
+
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
126
|
+
#
|
127
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
128
|
+
# speech transcription.
|
129
|
+
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
130
|
+
#
|
131
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
132
|
+
# speech transcription. If you must use a low-bitrate encoder,
|
133
|
+
# OGG_OPUS is preferred.
|
134
|
+
#
|
135
|
+
# @param [String,Symbol] language The language of the supplied audio as
|
136
|
+
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
137
|
+
# "en-US" for English (United States), "en-GB" for English (United
|
138
|
+
# Kingdom), "fr-FR" for French (France). See [Language
|
139
|
+
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
140
|
+
# of the currently supported language codes. Optional.
|
124
141
|
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
125
142
|
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
126
143
|
# For best results, set the sampling rate of the audio source to 16000
|
127
144
|
# Hz. If that's not possible, use the native sample rate of the audio
|
128
145
|
# source (instead of re-sampling). Optional.
|
129
|
-
# @param [String] language The language of the supplied audio as a
|
130
|
-
# [BCP-47](https://tools.ietf.org/html/bcp47) language
|
131
|
-
# code. If not specified, the language defaults to "en-US". See
|
132
|
-
# [Language
|
133
|
-
# Support](https://cloud.google.com/speech/docs/languages)
|
134
|
-
# for a list of the currently supported language codes. Optional.
|
135
146
|
#
|
136
147
|
# @return [Audio] The audio file to be recognized.
|
137
148
|
#
|
@@ -141,7 +152,9 @@ module Google
|
|
141
152
|
# speech = Google::Cloud::Speech.new
|
142
153
|
#
|
143
154
|
# audio = speech.audio "path/to/audio.raw",
|
144
|
-
# encoding: :raw,
|
155
|
+
# encoding: :raw,
|
156
|
+
# language: "en-US",
|
157
|
+
# sample_rate: 16000
|
145
158
|
#
|
146
159
|
# @example With a Google Cloud Storage URI:
|
147
160
|
# require "google/cloud/speech"
|
@@ -149,7 +162,9 @@ module Google
|
|
149
162
|
# speech = Google::Cloud::Speech.new
|
150
163
|
#
|
151
164
|
# audio = speech.audio "gs://bucket-name/path/to/audio.raw",
|
152
|
-
# encoding: :raw,
|
165
|
+
# encoding: :raw,
|
166
|
+
# language: "en-US",
|
167
|
+
# sample_rate: 16000
|
153
168
|
#
|
154
169
|
# @example With a Google Cloud Storage File object:
|
155
170
|
# require "google/cloud/storage"
|
@@ -163,17 +178,20 @@ module Google
|
|
163
178
|
#
|
164
179
|
# speech = Google::Cloud::Speech.new
|
165
180
|
#
|
166
|
-
# audio = speech.audio file,
|
181
|
+
# audio = speech.audio file,
|
182
|
+
# encoding: :raw,
|
183
|
+
# language: "en-US",
|
184
|
+
# sample_rate: 16000
|
167
185
|
#
|
168
|
-
def audio source, encoding: nil,
|
186
|
+
def audio source, encoding: nil, language: nil, sample_rate: nil
|
169
187
|
if source.is_a? Audio
|
170
188
|
audio = source.dup
|
171
189
|
else
|
172
190
|
audio = Audio.from_source source, self
|
173
191
|
end
|
174
192
|
audio.encoding = encoding unless encoding.nil?
|
175
|
-
audio.sample_rate = sample_rate unless sample_rate.nil?
|
176
193
|
audio.language = language unless language.nil?
|
194
|
+
audio.sample_rate = sample_rate unless sample_rate.nil?
|
177
195
|
audio
|
178
196
|
end
|
179
197
|
|
@@ -216,18 +234,27 @@ module Google
|
|
216
234
|
# be 8000 Hz.) (AMR)
|
217
235
|
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
218
236
|
# be 16000 Hz.) (AMR_WB)
|
219
|
-
#
|
237
|
+
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
238
|
+
#
|
239
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
240
|
+
# speech transcription.
|
241
|
+
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
242
|
+
#
|
243
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
244
|
+
# speech transcription. If you must use a low-bitrate encoder,
|
245
|
+
# OGG_OPUS is preferred.
|
246
|
+
#
|
247
|
+
# @param [String,Symbol] language The language of the supplied audio as
|
248
|
+
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
249
|
+
# "en-US" for English (United States), "en-GB" for English (United
|
250
|
+
# Kingdom), "fr-FR" for French (France). See [Language
|
251
|
+
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
252
|
+
# of the currently supported language codes. Optional.
|
220
253
|
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
221
254
|
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
222
255
|
# For best results, set the sampling rate of the audio source to 16000
|
223
256
|
# Hz. If that's not possible, use the native sample rate of the audio
|
224
257
|
# source (instead of re-sampling). Optional.
|
225
|
-
# @param [String] language The language of the supplied audio as a
|
226
|
-
# [BCP-47](https://tools.ietf.org/html/bcp47) language
|
227
|
-
# code. If not specified, the language defaults to "en-US". See
|
228
|
-
# [Language
|
229
|
-
# Support](https://cloud.google.com/speech/docs/languages)
|
230
|
-
# for a list of the currently supported language codes. Optional.
|
231
258
|
# @param [String] max_alternatives The Maximum number of recognition
|
232
259
|
# hypotheses to be returned. Default is 1. The service may return
|
233
260
|
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
@@ -248,7 +275,9 @@ module Google
|
|
248
275
|
# speech = Google::Cloud::Speech.new
|
249
276
|
#
|
250
277
|
# results = speech.recognize "path/to/audio.raw",
|
251
|
-
# encoding: :raw,
|
278
|
+
# encoding: :raw,
|
279
|
+
# language: "en-US",
|
280
|
+
# sample_rate: 16000
|
252
281
|
#
|
253
282
|
# @example With a Google Cloud Storage URI:
|
254
283
|
# require "google/cloud/speech"
|
@@ -256,7 +285,9 @@ module Google
|
|
256
285
|
# speech = Google::Cloud::Speech.new
|
257
286
|
#
|
258
287
|
# results = speech.recognize "gs://bucket-name/path/to/audio.raw",
|
259
|
-
# encoding: :raw,
|
288
|
+
# encoding: :raw,
|
289
|
+
# language: "en-US",
|
290
|
+
# sample_rate: 16000
|
260
291
|
#
|
261
292
|
# @example With a Google Cloud Storage File object:
|
262
293
|
# require "google/cloud/storage"
|
@@ -270,16 +301,18 @@ module Google
|
|
270
301
|
#
|
271
302
|
# speech = Google::Cloud::Speech.new
|
272
303
|
#
|
273
|
-
# results = speech.recognize file,
|
304
|
+
# results = speech.recognize file,
|
305
|
+
# encoding: :raw,
|
306
|
+
# language: "en-US",
|
274
307
|
# sample_rate: 16000,
|
275
308
|
# max_alternatives: 10
|
276
309
|
#
|
277
|
-
def recognize source, encoding: nil,
|
310
|
+
def recognize source, encoding: nil, language: nil, sample_rate: nil,
|
278
311
|
max_alternatives: nil, profanity_filter: nil, phrases: nil
|
279
312
|
ensure_service!
|
280
313
|
|
281
|
-
audio_obj = audio source, encoding: encoding,
|
282
|
-
sample_rate: sample_rate
|
314
|
+
audio_obj = audio source, encoding: encoding, language: language,
|
315
|
+
sample_rate: sample_rate
|
283
316
|
|
284
317
|
config = audio_config(
|
285
318
|
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
@@ -294,9 +327,9 @@ module Google
|
|
294
327
|
|
295
328
|
##
|
296
329
|
# Performs asynchronous speech recognition. Requests are processed
|
297
|
-
# asynchronously, meaning a
|
298
|
-
# sent, and can be refreshed to retrieve recognition results
|
299
|
-
# audio data has been processed.
|
330
|
+
# asynchronously, meaning a Operation is returned once the audio data
|
331
|
+
# has been sent, and can be refreshed to retrieve recognition results
|
332
|
+
# once the audio data has been processed.
|
300
333
|
#
|
301
334
|
# @see https://cloud.google.com/speech/docs/basics#async-responses
|
302
335
|
# Asynchronous Speech API Responses
|
@@ -309,22 +342,41 @@ module Google
|
|
309
342
|
# @param [String, Symbol] encoding Encoding of audio data to be
|
310
343
|
# recognized. Optional.
|
311
344
|
#
|
312
|
-
#
|
345
|
+
# Acceptable values are:
|
313
346
|
#
|
314
347
|
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
315
348
|
# (LINEAR16)
|
316
|
-
#
|
349
|
+
# * `flac` - The [Free Lossless Audio
|
350
|
+
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
351
|
+
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
352
|
+
# are supported. (FLAC)
|
353
|
+
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
354
|
+
# G.711 PCMU/mu-law. (MULAW)
|
355
|
+
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
356
|
+
# be 8000 Hz.) (AMR)
|
357
|
+
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
358
|
+
# be 16000 Hz.) (AMR_WB)
|
359
|
+
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
360
|
+
#
|
361
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
362
|
+
# speech transcription.
|
363
|
+
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
364
|
+
#
|
365
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
366
|
+
# speech transcription. If you must use a low-bitrate encoder,
|
367
|
+
# OGG_OPUS is preferred.
|
368
|
+
#
|
369
|
+
# @param [String,Symbol] language The language of the supplied audio as
|
370
|
+
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
371
|
+
# "en-US" for English (United States), "en-GB" for English (United
|
372
|
+
# Kingdom), "fr-FR" for French (France). See [Language
|
373
|
+
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
374
|
+
# of the currently supported language codes. Optional.
|
317
375
|
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
318
376
|
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
319
377
|
# For best results, set the sampling rate of the audio source to 16000
|
320
378
|
# Hz. If that's not possible, use the native sample rate of the audio
|
321
379
|
# source (instead of re-sampling). Optional.
|
322
|
-
# @param [String] language The language of the supplied audio as a
|
323
|
-
# [BCP-47](https://tools.ietf.org/html/bcp47) language
|
324
|
-
# code. If not specified, the language defaults to "en-US". See
|
325
|
-
# [Language
|
326
|
-
# Support](https://cloud.google.com/speech/docs/languages)
|
327
|
-
# for a list of the currently supported language codes. Optional.
|
328
380
|
# @param [String] max_alternatives The Maximum number of recognition
|
329
381
|
# hypotheses to be returned. Default is 1. The service may return
|
330
382
|
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
@@ -337,30 +389,34 @@ module Google
|
|
337
389
|
# recognize them. See [usage
|
338
390
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
339
391
|
#
|
340
|
-
# @return [
|
341
|
-
# processing of a speech-recognition operation.
|
392
|
+
# @return [Operation] A resource represents the long-running,
|
393
|
+
# asynchronous processing of a speech-recognition operation.
|
342
394
|
#
|
343
395
|
# @example
|
344
396
|
# require "google/cloud/speech"
|
345
397
|
#
|
346
398
|
# speech = Google::Cloud::Speech.new
|
347
399
|
#
|
348
|
-
#
|
349
|
-
#
|
400
|
+
# op = speech.process "path/to/audio.raw",
|
401
|
+
# encoding: :raw,
|
402
|
+
# language: "en-US",
|
403
|
+
# sample_rate: 16000
|
350
404
|
#
|
351
|
-
#
|
352
|
-
#
|
405
|
+
# op.done? #=> false
|
406
|
+
# op.reload!
|
353
407
|
#
|
354
408
|
# @example With a Google Cloud Storage URI:
|
355
409
|
# require "google/cloud/speech"
|
356
410
|
#
|
357
411
|
# speech = Google::Cloud::Speech.new
|
358
412
|
#
|
359
|
-
#
|
360
|
-
# encoding: :raw,
|
413
|
+
# op = speech.process "gs://bucket-name/path/to/audio.raw",
|
414
|
+
# encoding: :raw,
|
415
|
+
# language: "en-US",
|
416
|
+
# sample_rate: 16000
|
361
417
|
#
|
362
|
-
#
|
363
|
-
#
|
418
|
+
# op.done? #=> false
|
419
|
+
# op.reload!
|
364
420
|
#
|
365
421
|
# @example With a Google Cloud Storage File object:
|
366
422
|
# require "google/cloud/storage"
|
@@ -374,20 +430,21 @@ module Google
|
|
374
430
|
#
|
375
431
|
# speech = Google::Cloud::Speech.new
|
376
432
|
#
|
377
|
-
#
|
433
|
+
# op = speech.process file,
|
434
|
+
# encoding: :raw,
|
435
|
+
# language: "en-US",
|
378
436
|
# sample_rate: 16000,
|
379
437
|
# max_alternatives: 10
|
380
438
|
#
|
381
|
-
#
|
382
|
-
#
|
439
|
+
# op.done? #=> false
|
440
|
+
# op.reload!
|
383
441
|
#
|
384
|
-
def
|
385
|
-
|
386
|
-
profanity_filter: nil, phrases: nil
|
442
|
+
def process source, encoding: nil, sample_rate: nil, language: nil,
|
443
|
+
max_alternatives: nil, profanity_filter: nil, phrases: nil
|
387
444
|
ensure_service!
|
388
445
|
|
389
|
-
audio_obj = audio source, encoding: encoding,
|
390
|
-
sample_rate: sample_rate
|
446
|
+
audio_obj = audio source, encoding: encoding, language: language,
|
447
|
+
sample_rate: sample_rate
|
391
448
|
|
392
449
|
config = audio_config(
|
393
450
|
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
@@ -395,8 +452,10 @@ module Google
|
|
395
452
|
profanity_filter: profanity_filter, phrases: phrases)
|
396
453
|
|
397
454
|
grpc = service.recognize_async audio_obj.to_grpc, config
|
398
|
-
|
455
|
+
Operation.from_grpc grpc
|
399
456
|
end
|
457
|
+
alias_method :long_running_recognize, :process
|
458
|
+
alias_method :recognize_job, :process
|
400
459
|
|
401
460
|
##
|
402
461
|
# Creates a Stream object to perform bidirectional streaming
|
@@ -422,18 +481,27 @@ module Google
|
|
422
481
|
# be 8000 Hz.) (AMR)
|
423
482
|
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
424
483
|
# be 16000 Hz.) (AMR_WB)
|
425
|
-
#
|
484
|
+
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
485
|
+
#
|
486
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
487
|
+
# speech transcription.
|
488
|
+
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
489
|
+
#
|
490
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
491
|
+
# speech transcription. If you must use a low-bitrate encoder,
|
492
|
+
# OGG_OPUS is preferred.
|
493
|
+
#
|
494
|
+
# @param [String,Symbol] language The language of the supplied audio as
|
495
|
+
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
496
|
+
# "en-US" for English (United States), "en-GB" for English (United
|
497
|
+
# Kingdom), "fr-FR" for French (France). See [Language
|
498
|
+
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
499
|
+
# of the currently supported language codes. Optional.
|
426
500
|
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
427
501
|
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
428
502
|
# For best results, set the sampling rate of the audio source to 16000
|
429
503
|
# Hz. If that's not possible, use the native sample rate of the audio
|
430
504
|
# source (instead of re-sampling). Optional.
|
431
|
-
# @param [String] language The language of the supplied audio as a
|
432
|
-
# [BCP-47](https://tools.ietf.org/html/bcp47) language
|
433
|
-
# code. If not specified, the language defaults to "en-US". See
|
434
|
-
# [Language
|
435
|
-
# Support](https://cloud.google.com/speech/docs/languages)
|
436
|
-
# for a list of the currently supported language codes. Optional.
|
437
505
|
# @param [String] max_alternatives The Maximum number of recognition
|
438
506
|
# hypotheses to be returned. Default is 1. The service may return
|
439
507
|
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
@@ -462,14 +530,9 @@ module Google
|
|
462
530
|
#
|
463
531
|
# speech = Google::Cloud::Speech.new
|
464
532
|
#
|
465
|
-
# stream = speech.stream encoding: :raw,
|
466
|
-
#
|
467
|
-
#
|
468
|
-
# stream.on_result do |results|
|
469
|
-
# result = results.first
|
470
|
-
# puts result.transcript # "how old is the Brooklyn Bridge"
|
471
|
-
# puts result.confidence # 0.9826789498329163
|
472
|
-
# end
|
533
|
+
# stream = speech.stream encoding: :raw,
|
534
|
+
# language: "en-US",
|
535
|
+
# sample_rate: 16000
|
473
536
|
#
|
474
537
|
# # Stream 5 seconds of audio from the microphone
|
475
538
|
# # Actual implementation of microphone input varies by platform
|
@@ -478,18 +541,24 @@ module Google
|
|
478
541
|
# end
|
479
542
|
#
|
480
543
|
# stream.stop
|
544
|
+
# stream.wait_until_complete!
|
545
|
+
#
|
546
|
+
# results = stream.results
|
547
|
+
# result = results.first
|
548
|
+
# result.transcript #=> "how old is the Brooklyn Bridge"
|
549
|
+
# result.confidence #=> 0.9826789498329163
|
481
550
|
#
|
482
|
-
def stream encoding: nil,
|
551
|
+
def stream encoding: nil, language: nil, sample_rate: nil,
|
483
552
|
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
484
553
|
utterance: nil, interim: nil
|
485
554
|
ensure_service!
|
486
555
|
|
487
|
-
grpc_req =
|
488
|
-
streaming_config:
|
556
|
+
grpc_req = V1::StreamingRecognizeRequest.new(
|
557
|
+
streaming_config: V1::StreamingRecognitionConfig.new(
|
489
558
|
{
|
490
559
|
config: audio_config(encoding: convert_encoding(encoding),
|
491
|
-
sample_rate: sample_rate,
|
492
560
|
language: language,
|
561
|
+
sample_rate: sample_rate,
|
493
562
|
max_alternatives: max_alternatives,
|
494
563
|
profanity_filter: profanity_filter,
|
495
564
|
phrases: phrases),
|
@@ -501,28 +570,62 @@ module Google
|
|
501
570
|
|
502
571
|
Stream.new service, grpc_req
|
503
572
|
end
|
573
|
+
alias_method :stream_recognize, :stream
|
574
|
+
|
575
|
+
##
|
576
|
+
# Performs asynchronous speech recognition. Requests are processed
|
577
|
+
# asynchronously, meaning a Operation is returned once the audio data
|
578
|
+
# has been sent, and can be refreshed to retrieve recognition results
|
579
|
+
# once the audio data has been processed.
|
580
|
+
#
|
581
|
+
# @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
|
582
|
+
# Long-running Operation
|
583
|
+
#
|
584
|
+
# @param [String] id The unique identifier for the long running
|
585
|
+
# operation. Required.
|
586
|
+
#
|
587
|
+
# @return [Operation] A resource represents the long-running,
|
588
|
+
# asynchronous processing of a speech-recognition operation.
|
589
|
+
#
|
590
|
+
# @example
|
591
|
+
# require "google/cloud/speech"
|
592
|
+
#
|
593
|
+
# speech = Google::Cloud::Speech.new
|
594
|
+
#
|
595
|
+
# op = speech.operation "1234567890"
|
596
|
+
#
|
597
|
+
# op.done? #=> false
|
598
|
+
# op.reload!
|
599
|
+
#
|
600
|
+
def operation id
|
601
|
+
ensure_service!
|
602
|
+
|
603
|
+
grpc = service.get_op id
|
604
|
+
Operation.from_grpc grpc
|
605
|
+
end
|
504
606
|
|
505
607
|
protected
|
506
608
|
|
507
|
-
def audio_config encoding: nil,
|
609
|
+
def audio_config encoding: nil, language: nil, sample_rate: nil,
|
508
610
|
max_alternatives: nil, profanity_filter: nil,
|
509
611
|
phrases: nil
|
510
|
-
|
511
|
-
|
612
|
+
contexts = nil
|
613
|
+
contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
|
512
614
|
language = String(language) unless language.nil?
|
513
|
-
|
615
|
+
V1::RecognitionConfig.new({
|
514
616
|
encoding: convert_encoding(encoding),
|
515
|
-
sample_rate: sample_rate,
|
516
617
|
language_code: language,
|
618
|
+
sample_rate_hertz: sample_rate,
|
517
619
|
max_alternatives: max_alternatives,
|
518
620
|
profanity_filter: profanity_filter,
|
519
|
-
|
621
|
+
speech_contexts: contexts
|
520
622
|
}.delete_if { |_, v| v.nil? })
|
521
623
|
end
|
522
624
|
|
523
625
|
def convert_encoding encoding
|
524
626
|
mapping = { raw: :LINEAR16, linear: :LINEAR16, linear16: :LINEAR16,
|
525
|
-
flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB
|
627
|
+
flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB,
|
628
|
+
ogg_opus: :OGG_OPUS, speex: :SPEEX_WITH_HEADER_BYTE }
|
526
629
|
mapping[encoding] || encoding
|
527
630
|
end
|
528
631
|
|