google-cloud-speech 0.23.0 → 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +0 -2
- data/README.md +4 -2
- data/lib/google-cloud-speech.rb +6 -2
- data/lib/google/cloud/speech.rb +37 -23
- data/lib/google/cloud/speech/audio.rb +73 -44
- data/lib/google/cloud/speech/credentials.rb +2 -2
- data/lib/google/cloud/speech/operation.rb +262 -0
- data/lib/google/cloud/speech/project.rb +186 -83
- data/lib/google/cloud/speech/result.rb +14 -8
- data/lib/google/cloud/speech/service.rb +12 -6
- data/lib/google/cloud/speech/stream.rb +128 -131
- data/lib/google/cloud/speech/{v1beta1.rb → v1.rb} +2 -3
- data/lib/google/cloud/speech/v1/cloud_speech_pb.rb +116 -0
- data/lib/google/cloud/speech/{v1beta1 → v1}/cloud_speech_services_pb.rb +11 -11
- data/lib/google/cloud/speech/{v1beta1/doc/google/cloud/speech/v1beta1 → v1/doc/google/cloud/speech/v1}/cloud_speech.rb +157 -161
- data/lib/google/cloud/speech/{v1beta1 → v1}/doc/google/protobuf/any.rb +0 -0
- data/lib/google/cloud/speech/{v1beta1 → v1}/doc/google/rpc/status.rb +0 -0
- data/lib/google/cloud/speech/{v1beta1 → v1}/speech_client.rb +71 -58
- data/lib/google/cloud/speech/{v1beta1 → v1}/speech_client_config.json +8 -8
- data/lib/google/cloud/speech/version.rb +1 -1
- metadata +13 -13
- data/lib/google/cloud/speech/job.rb +0 -159
- data/lib/google/cloud/speech/v1beta1/cloud_speech_pb.rb +0 -116
@@ -18,7 +18,7 @@ require "google/cloud/env"
|
|
18
18
|
require "google/cloud/speech/service"
|
19
19
|
require "google/cloud/speech/audio"
|
20
20
|
require "google/cloud/speech/result"
|
21
|
-
require "google/cloud/speech/
|
21
|
+
require "google/cloud/speech/operation"
|
22
22
|
require "google/cloud/speech/stream"
|
23
23
|
|
24
24
|
module Google
|
@@ -44,7 +44,9 @@ module Google
|
|
44
44
|
# speech = Google::Cloud::Speech.new
|
45
45
|
#
|
46
46
|
# audio = speech.audio "path/to/audio.raw",
|
47
|
-
# encoding: :raw,
|
47
|
+
# encoding: :raw,
|
48
|
+
# language: "en-US",
|
49
|
+
# sample_rate: 16000
|
48
50
|
# results = audio.recognize
|
49
51
|
#
|
50
52
|
# result = results.first
|
@@ -120,18 +122,27 @@ module Google
|
|
120
122
|
# be 8000 Hz.) (AMR)
|
121
123
|
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
122
124
|
# be 16000 Hz.) (AMR_WB)
|
123
|
-
#
|
125
|
+
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
126
|
+
#
|
127
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
128
|
+
# speech transcription.
|
129
|
+
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
130
|
+
#
|
131
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
132
|
+
# speech transcription. If you must use a low-bitrate encoder,
|
133
|
+
# OGG_OPUS is preferred.
|
134
|
+
#
|
135
|
+
# @param [String,Symbol] language The language of the supplied audio as
|
136
|
+
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
137
|
+
# "en-US" for English (United States), "en-GB" for English (United
|
138
|
+
# Kingdom), "fr-FR" for French (France). See [Language
|
139
|
+
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
140
|
+
# of the currently supported language codes. Optional.
|
124
141
|
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
125
142
|
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
126
143
|
# For best results, set the sampling rate of the audio source to 16000
|
127
144
|
# Hz. If that's not possible, use the native sample rate of the audio
|
128
145
|
# source (instead of re-sampling). Optional.
|
129
|
-
# @param [String] language The language of the supplied audio as a
|
130
|
-
# [BCP-47](https://tools.ietf.org/html/bcp47) language
|
131
|
-
# code. If not specified, the language defaults to "en-US". See
|
132
|
-
# [Language
|
133
|
-
# Support](https://cloud.google.com/speech/docs/languages)
|
134
|
-
# for a list of the currently supported language codes. Optional.
|
135
146
|
#
|
136
147
|
# @return [Audio] The audio file to be recognized.
|
137
148
|
#
|
@@ -141,7 +152,9 @@ module Google
|
|
141
152
|
# speech = Google::Cloud::Speech.new
|
142
153
|
#
|
143
154
|
# audio = speech.audio "path/to/audio.raw",
|
144
|
-
# encoding: :raw,
|
155
|
+
# encoding: :raw,
|
156
|
+
# language: "en-US",
|
157
|
+
# sample_rate: 16000
|
145
158
|
#
|
146
159
|
# @example With a Google Cloud Storage URI:
|
147
160
|
# require "google/cloud/speech"
|
@@ -149,7 +162,9 @@ module Google
|
|
149
162
|
# speech = Google::Cloud::Speech.new
|
150
163
|
#
|
151
164
|
# audio = speech.audio "gs://bucket-name/path/to/audio.raw",
|
152
|
-
# encoding: :raw,
|
165
|
+
# encoding: :raw,
|
166
|
+
# language: "en-US",
|
167
|
+
# sample_rate: 16000
|
153
168
|
#
|
154
169
|
# @example With a Google Cloud Storage File object:
|
155
170
|
# require "google/cloud/storage"
|
@@ -163,17 +178,20 @@ module Google
|
|
163
178
|
#
|
164
179
|
# speech = Google::Cloud::Speech.new
|
165
180
|
#
|
166
|
-
# audio = speech.audio file,
|
181
|
+
# audio = speech.audio file,
|
182
|
+
# encoding: :raw,
|
183
|
+
# language: "en-US",
|
184
|
+
# sample_rate: 16000
|
167
185
|
#
|
168
|
-
def audio source, encoding: nil,
|
186
|
+
def audio source, encoding: nil, language: nil, sample_rate: nil
|
169
187
|
if source.is_a? Audio
|
170
188
|
audio = source.dup
|
171
189
|
else
|
172
190
|
audio = Audio.from_source source, self
|
173
191
|
end
|
174
192
|
audio.encoding = encoding unless encoding.nil?
|
175
|
-
audio.sample_rate = sample_rate unless sample_rate.nil?
|
176
193
|
audio.language = language unless language.nil?
|
194
|
+
audio.sample_rate = sample_rate unless sample_rate.nil?
|
177
195
|
audio
|
178
196
|
end
|
179
197
|
|
@@ -216,18 +234,27 @@ module Google
|
|
216
234
|
# be 8000 Hz.) (AMR)
|
217
235
|
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
218
236
|
# be 16000 Hz.) (AMR_WB)
|
219
|
-
#
|
237
|
+
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
238
|
+
#
|
239
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
240
|
+
# speech transcription.
|
241
|
+
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
242
|
+
#
|
243
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
244
|
+
# speech transcription. If you must use a low-bitrate encoder,
|
245
|
+
# OGG_OPUS is preferred.
|
246
|
+
#
|
247
|
+
# @param [String,Symbol] language The language of the supplied audio as
|
248
|
+
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
249
|
+
# "en-US" for English (United States), "en-GB" for English (United
|
250
|
+
# Kingdom), "fr-FR" for French (France). See [Language
|
251
|
+
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
252
|
+
# of the currently supported language codes. Optional.
|
220
253
|
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
221
254
|
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
222
255
|
# For best results, set the sampling rate of the audio source to 16000
|
223
256
|
# Hz. If that's not possible, use the native sample rate of the audio
|
224
257
|
# source (instead of re-sampling). Optional.
|
225
|
-
# @param [String] language The language of the supplied audio as a
|
226
|
-
# [BCP-47](https://tools.ietf.org/html/bcp47) language
|
227
|
-
# code. If not specified, the language defaults to "en-US". See
|
228
|
-
# [Language
|
229
|
-
# Support](https://cloud.google.com/speech/docs/languages)
|
230
|
-
# for a list of the currently supported language codes. Optional.
|
231
258
|
# @param [String] max_alternatives The Maximum number of recognition
|
232
259
|
# hypotheses to be returned. Default is 1. The service may return
|
233
260
|
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
@@ -248,7 +275,9 @@ module Google
|
|
248
275
|
# speech = Google::Cloud::Speech.new
|
249
276
|
#
|
250
277
|
# results = speech.recognize "path/to/audio.raw",
|
251
|
-
# encoding: :raw,
|
278
|
+
# encoding: :raw,
|
279
|
+
# language: "en-US",
|
280
|
+
# sample_rate: 16000
|
252
281
|
#
|
253
282
|
# @example With a Google Cloud Storage URI:
|
254
283
|
# require "google/cloud/speech"
|
@@ -256,7 +285,9 @@ module Google
|
|
256
285
|
# speech = Google::Cloud::Speech.new
|
257
286
|
#
|
258
287
|
# results = speech.recognize "gs://bucket-name/path/to/audio.raw",
|
259
|
-
# encoding: :raw,
|
288
|
+
# encoding: :raw,
|
289
|
+
# language: "en-US",
|
290
|
+
# sample_rate: 16000
|
260
291
|
#
|
261
292
|
# @example With a Google Cloud Storage File object:
|
262
293
|
# require "google/cloud/storage"
|
@@ -270,16 +301,18 @@ module Google
|
|
270
301
|
#
|
271
302
|
# speech = Google::Cloud::Speech.new
|
272
303
|
#
|
273
|
-
# results = speech.recognize file,
|
304
|
+
# results = speech.recognize file,
|
305
|
+
# encoding: :raw,
|
306
|
+
# language: "en-US",
|
274
307
|
# sample_rate: 16000,
|
275
308
|
# max_alternatives: 10
|
276
309
|
#
|
277
|
-
def recognize source, encoding: nil,
|
310
|
+
def recognize source, encoding: nil, language: nil, sample_rate: nil,
|
278
311
|
max_alternatives: nil, profanity_filter: nil, phrases: nil
|
279
312
|
ensure_service!
|
280
313
|
|
281
|
-
audio_obj = audio source, encoding: encoding,
|
282
|
-
sample_rate: sample_rate
|
314
|
+
audio_obj = audio source, encoding: encoding, language: language,
|
315
|
+
sample_rate: sample_rate
|
283
316
|
|
284
317
|
config = audio_config(
|
285
318
|
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
@@ -294,9 +327,9 @@ module Google
|
|
294
327
|
|
295
328
|
##
|
296
329
|
# Performs asynchronous speech recognition. Requests are processed
|
297
|
-
# asynchronously, meaning a
|
298
|
-
# sent, and can be refreshed to retrieve recognition results
|
299
|
-
# audio data has been processed.
|
330
|
+
# asynchronously, meaning a Operation is returned once the audio data
|
331
|
+
# has been sent, and can be refreshed to retrieve recognition results
|
332
|
+
# once the audio data has been processed.
|
300
333
|
#
|
301
334
|
# @see https://cloud.google.com/speech/docs/basics#async-responses
|
302
335
|
# Asynchronous Speech API Responses
|
@@ -309,22 +342,41 @@ module Google
|
|
309
342
|
# @param [String, Symbol] encoding Encoding of audio data to be
|
310
343
|
# recognized. Optional.
|
311
344
|
#
|
312
|
-
#
|
345
|
+
# Acceptable values are:
|
313
346
|
#
|
314
347
|
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
315
348
|
# (LINEAR16)
|
316
|
-
#
|
349
|
+
# * `flac` - The [Free Lossless Audio
|
350
|
+
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
351
|
+
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
352
|
+
# are supported. (FLAC)
|
353
|
+
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
354
|
+
# G.711 PCMU/mu-law. (MULAW)
|
355
|
+
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
356
|
+
# be 8000 Hz.) (AMR)
|
357
|
+
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
358
|
+
# be 16000 Hz.) (AMR_WB)
|
359
|
+
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
360
|
+
#
|
361
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
362
|
+
# speech transcription.
|
363
|
+
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
364
|
+
#
|
365
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
366
|
+
# speech transcription. If you must use a low-bitrate encoder,
|
367
|
+
# OGG_OPUS is preferred.
|
368
|
+
#
|
369
|
+
# @param [String,Symbol] language The language of the supplied audio as
|
370
|
+
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
371
|
+
# "en-US" for English (United States), "en-GB" for English (United
|
372
|
+
# Kingdom), "fr-FR" for French (France). See [Language
|
373
|
+
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
374
|
+
# of the currently supported language codes. Optional.
|
317
375
|
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
318
376
|
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
319
377
|
# For best results, set the sampling rate of the audio source to 16000
|
320
378
|
# Hz. If that's not possible, use the native sample rate of the audio
|
321
379
|
# source (instead of re-sampling). Optional.
|
322
|
-
# @param [String] language The language of the supplied audio as a
|
323
|
-
# [BCP-47](https://tools.ietf.org/html/bcp47) language
|
324
|
-
# code. If not specified, the language defaults to "en-US". See
|
325
|
-
# [Language
|
326
|
-
# Support](https://cloud.google.com/speech/docs/languages)
|
327
|
-
# for a list of the currently supported language codes. Optional.
|
328
380
|
# @param [String] max_alternatives The Maximum number of recognition
|
329
381
|
# hypotheses to be returned. Default is 1. The service may return
|
330
382
|
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
@@ -337,30 +389,34 @@ module Google
|
|
337
389
|
# recognize them. See [usage
|
338
390
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
339
391
|
#
|
340
|
-
# @return [
|
341
|
-
# processing of a speech-recognition operation.
|
392
|
+
# @return [Operation] A resource represents the long-running,
|
393
|
+
# asynchronous processing of a speech-recognition operation.
|
342
394
|
#
|
343
395
|
# @example
|
344
396
|
# require "google/cloud/speech"
|
345
397
|
#
|
346
398
|
# speech = Google::Cloud::Speech.new
|
347
399
|
#
|
348
|
-
#
|
349
|
-
#
|
400
|
+
# op = speech.process "path/to/audio.raw",
|
401
|
+
# encoding: :raw,
|
402
|
+
# language: "en-US",
|
403
|
+
# sample_rate: 16000
|
350
404
|
#
|
351
|
-
#
|
352
|
-
#
|
405
|
+
# op.done? #=> false
|
406
|
+
# op.reload!
|
353
407
|
#
|
354
408
|
# @example With a Google Cloud Storage URI:
|
355
409
|
# require "google/cloud/speech"
|
356
410
|
#
|
357
411
|
# speech = Google::Cloud::Speech.new
|
358
412
|
#
|
359
|
-
#
|
360
|
-
# encoding: :raw,
|
413
|
+
# op = speech.process "gs://bucket-name/path/to/audio.raw",
|
414
|
+
# encoding: :raw,
|
415
|
+
# language: "en-US",
|
416
|
+
# sample_rate: 16000
|
361
417
|
#
|
362
|
-
#
|
363
|
-
#
|
418
|
+
# op.done? #=> false
|
419
|
+
# op.reload!
|
364
420
|
#
|
365
421
|
# @example With a Google Cloud Storage File object:
|
366
422
|
# require "google/cloud/storage"
|
@@ -374,20 +430,21 @@ module Google
|
|
374
430
|
#
|
375
431
|
# speech = Google::Cloud::Speech.new
|
376
432
|
#
|
377
|
-
#
|
433
|
+
# op = speech.process file,
|
434
|
+
# encoding: :raw,
|
435
|
+
# language: "en-US",
|
378
436
|
# sample_rate: 16000,
|
379
437
|
# max_alternatives: 10
|
380
438
|
#
|
381
|
-
#
|
382
|
-
#
|
439
|
+
# op.done? #=> false
|
440
|
+
# op.reload!
|
383
441
|
#
|
384
|
-
def
|
385
|
-
|
386
|
-
profanity_filter: nil, phrases: nil
|
442
|
+
def process source, encoding: nil, sample_rate: nil, language: nil,
|
443
|
+
max_alternatives: nil, profanity_filter: nil, phrases: nil
|
387
444
|
ensure_service!
|
388
445
|
|
389
|
-
audio_obj = audio source, encoding: encoding,
|
390
|
-
sample_rate: sample_rate
|
446
|
+
audio_obj = audio source, encoding: encoding, language: language,
|
447
|
+
sample_rate: sample_rate
|
391
448
|
|
392
449
|
config = audio_config(
|
393
450
|
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
@@ -395,8 +452,10 @@ module Google
|
|
395
452
|
profanity_filter: profanity_filter, phrases: phrases)
|
396
453
|
|
397
454
|
grpc = service.recognize_async audio_obj.to_grpc, config
|
398
|
-
|
455
|
+
Operation.from_grpc grpc
|
399
456
|
end
|
457
|
+
alias_method :long_running_recognize, :process
|
458
|
+
alias_method :recognize_job, :process
|
400
459
|
|
401
460
|
##
|
402
461
|
# Creates a Stream object to perform bidirectional streaming
|
@@ -422,18 +481,27 @@ module Google
|
|
422
481
|
# be 8000 Hz.) (AMR)
|
423
482
|
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
424
483
|
# be 16000 Hz.) (AMR_WB)
|
425
|
-
#
|
484
|
+
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
485
|
+
#
|
486
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
487
|
+
# speech transcription.
|
488
|
+
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
489
|
+
#
|
490
|
+
# Lossy codecs do not recommend, as they result in a lower-quality
|
491
|
+
# speech transcription. If you must use a low-bitrate encoder,
|
492
|
+
# OGG_OPUS is preferred.
|
493
|
+
#
|
494
|
+
# @param [String,Symbol] language The language of the supplied audio as
|
495
|
+
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
496
|
+
# "en-US" for English (United States), "en-GB" for English (United
|
497
|
+
# Kingdom), "fr-FR" for French (France). See [Language
|
498
|
+
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
499
|
+
# of the currently supported language codes. Optional.
|
426
500
|
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
427
501
|
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
428
502
|
# For best results, set the sampling rate of the audio source to 16000
|
429
503
|
# Hz. If that's not possible, use the native sample rate of the audio
|
430
504
|
# source (instead of re-sampling). Optional.
|
431
|
-
# @param [String] language The language of the supplied audio as a
|
432
|
-
# [BCP-47](https://tools.ietf.org/html/bcp47) language
|
433
|
-
# code. If not specified, the language defaults to "en-US". See
|
434
|
-
# [Language
|
435
|
-
# Support](https://cloud.google.com/speech/docs/languages)
|
436
|
-
# for a list of the currently supported language codes. Optional.
|
437
505
|
# @param [String] max_alternatives The Maximum number of recognition
|
438
506
|
# hypotheses to be returned. Default is 1. The service may return
|
439
507
|
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
@@ -462,14 +530,9 @@ module Google
|
|
462
530
|
#
|
463
531
|
# speech = Google::Cloud::Speech.new
|
464
532
|
#
|
465
|
-
# stream = speech.stream encoding: :raw,
|
466
|
-
#
|
467
|
-
#
|
468
|
-
# stream.on_result do |results|
|
469
|
-
# result = results.first
|
470
|
-
# puts result.transcript # "how old is the Brooklyn Bridge"
|
471
|
-
# puts result.confidence # 0.9826789498329163
|
472
|
-
# end
|
533
|
+
# stream = speech.stream encoding: :raw,
|
534
|
+
# language: "en-US",
|
535
|
+
# sample_rate: 16000
|
473
536
|
#
|
474
537
|
# # Stream 5 seconds of audio from the microphone
|
475
538
|
# # Actual implementation of microphone input varies by platform
|
@@ -478,18 +541,24 @@ module Google
|
|
478
541
|
# end
|
479
542
|
#
|
480
543
|
# stream.stop
|
544
|
+
# stream.wait_until_complete!
|
545
|
+
#
|
546
|
+
# results = stream.results
|
547
|
+
# result = results.first
|
548
|
+
# result.transcript #=> "how old is the Brooklyn Bridge"
|
549
|
+
# result.confidence #=> 0.9826789498329163
|
481
550
|
#
|
482
|
-
def stream encoding: nil,
|
551
|
+
def stream encoding: nil, language: nil, sample_rate: nil,
|
483
552
|
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
484
553
|
utterance: nil, interim: nil
|
485
554
|
ensure_service!
|
486
555
|
|
487
|
-
grpc_req =
|
488
|
-
streaming_config:
|
556
|
+
grpc_req = V1::StreamingRecognizeRequest.new(
|
557
|
+
streaming_config: V1::StreamingRecognitionConfig.new(
|
489
558
|
{
|
490
559
|
config: audio_config(encoding: convert_encoding(encoding),
|
491
|
-
sample_rate: sample_rate,
|
492
560
|
language: language,
|
561
|
+
sample_rate: sample_rate,
|
493
562
|
max_alternatives: max_alternatives,
|
494
563
|
profanity_filter: profanity_filter,
|
495
564
|
phrases: phrases),
|
@@ -501,28 +570,62 @@ module Google
|
|
501
570
|
|
502
571
|
Stream.new service, grpc_req
|
503
572
|
end
|
573
|
+
alias_method :stream_recognize, :stream
|
574
|
+
|
575
|
+
##
|
576
|
+
# Performs asynchronous speech recognition. Requests are processed
|
577
|
+
# asynchronously, meaning a Operation is returned once the audio data
|
578
|
+
# has been sent, and can be refreshed to retrieve recognition results
|
579
|
+
# once the audio data has been processed.
|
580
|
+
#
|
581
|
+
# @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
|
582
|
+
# Long-running Operation
|
583
|
+
#
|
584
|
+
# @param [String] id The unique identifier for the long running
|
585
|
+
# operation. Required.
|
586
|
+
#
|
587
|
+
# @return [Operation] A resource represents the long-running,
|
588
|
+
# asynchronous processing of a speech-recognition operation.
|
589
|
+
#
|
590
|
+
# @example
|
591
|
+
# require "google/cloud/speech"
|
592
|
+
#
|
593
|
+
# speech = Google::Cloud::Speech.new
|
594
|
+
#
|
595
|
+
# op = speech.operation "1234567890"
|
596
|
+
#
|
597
|
+
# op.done? #=> false
|
598
|
+
# op.reload!
|
599
|
+
#
|
600
|
+
def operation id
|
601
|
+
ensure_service!
|
602
|
+
|
603
|
+
grpc = service.get_op id
|
604
|
+
Operation.from_grpc grpc
|
605
|
+
end
|
504
606
|
|
505
607
|
protected
|
506
608
|
|
507
|
-
def audio_config encoding: nil,
|
609
|
+
def audio_config encoding: nil, language: nil, sample_rate: nil,
|
508
610
|
max_alternatives: nil, profanity_filter: nil,
|
509
611
|
phrases: nil
|
510
|
-
|
511
|
-
|
612
|
+
contexts = nil
|
613
|
+
contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
|
512
614
|
language = String(language) unless language.nil?
|
513
|
-
|
615
|
+
V1::RecognitionConfig.new({
|
514
616
|
encoding: convert_encoding(encoding),
|
515
|
-
sample_rate: sample_rate,
|
516
617
|
language_code: language,
|
618
|
+
sample_rate_hertz: sample_rate,
|
517
619
|
max_alternatives: max_alternatives,
|
518
620
|
profanity_filter: profanity_filter,
|
519
|
-
|
621
|
+
speech_contexts: contexts
|
520
622
|
}.delete_if { |_, v| v.nil? })
|
521
623
|
end
|
522
624
|
|
523
625
|
def convert_encoding encoding
|
524
626
|
mapping = { raw: :LINEAR16, linear: :LINEAR16, linear16: :LINEAR16,
|
525
|
-
flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB
|
627
|
+
flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB,
|
628
|
+
ogg_opus: :OGG_OPUS, speex: :SPEEX_WITH_HEADER_BYTE }
|
526
629
|
mapping[encoding] || encoding
|
527
630
|
end
|
528
631
|
|