google-cloud-speech 0.23.0 → 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
- require "google/cloud/speech/v1beta1"
16
+ require "google/cloud/speech/v1"
17
17
 
18
18
  module Google
19
19
  module Cloud
@@ -23,9 +23,9 @@ module Google
23
23
  #
24
24
  # A speech recognition result corresponding to a portion of the audio.
25
25
  #
26
- # See {Project#recognize} and {Job#results}.
26
+ # See {Project#recognize} and {Operation#results}.
27
27
  #
28
- # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.SpeechRecognitionResult
28
+ # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.V1#google.cloud.speech.V1.SpeechRecognitionResult
29
29
  # SpeechRecognitionResult
30
30
  #
31
31
  # @attr_reader [String] transcript Transcript text representing the words
@@ -46,7 +46,9 @@ module Google
46
46
  # speech = Google::Cloud::Speech.new
47
47
  #
48
48
  # audio = speech.audio "path/to/audio.raw",
49
- # encoding: :raw, sample_rate: 16000
49
+ # encoding: :raw,
50
+ # language: "en-US",
51
+ # sample_rate: 16000
50
52
  # results = audio.recognize
51
53
  #
52
54
  # result = results.first
@@ -94,7 +96,9 @@ module Google
94
96
  # speech = Google::Cloud::Speech.new
95
97
  #
96
98
  # audio = speech.audio "path/to/audio.raw",
97
- # encoding: :raw, sample_rate: 16000
99
+ # encoding: :raw,
100
+ # language: "en-US",
101
+ # sample_rate: 16000
98
102
  # results = audio.recognize
99
103
  #
100
104
  # result = results.first
@@ -124,9 +128,9 @@ module Google
124
128
  #
125
129
  # See {Project#stream} and {Stream#on_interim}.
126
130
  #
127
- # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.SpeechRecognitionResult
131
+ # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.V1#google.cloud.speech.V1.SpeechRecognitionResult
128
132
  # SpeechRecognitionResult
129
- # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.StreamingRecognitionResult
133
+ # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.V1#google.cloud.speech.V1.StreamingRecognitionResult
130
134
  # StreamingRecognitionResult
131
135
  #
132
136
  # @attr_reader [String] transcript Transcript text representing the words
@@ -150,7 +154,9 @@ module Google
150
154
  #
151
155
  # speech = Google::Cloud::Speech.new
152
156
  #
153
- # stream = speech.stream encoding: :raw, sample_rate: 16000
157
+ # stream = speech.stream encoding: :raw,
158
+ # language: "en-US",
159
+ # sample_rate: 16000
154
160
  #
155
161
  # # register callback for when an interim result is returned
156
162
  # stream.on_interim do |final_results, interim_results|
@@ -16,7 +16,7 @@
16
16
  require "google/cloud/errors"
17
17
  require "google/cloud/speech/credentials"
18
18
  require "google/cloud/speech/version"
19
- require "google/cloud/speech/v1beta1"
19
+ require "google/cloud/speech/v1"
20
20
 
21
21
  module Google
22
22
  module Cloud
@@ -33,7 +33,7 @@ module Google
33
33
  client_config: nil
34
34
  @project = project
35
35
  @credentials = credentials
36
- @host = host || V1beta1::SpeechClient::SERVICE_ADDRESS
36
+ @host = host || V1::SpeechClient::SERVICE_ADDRESS
37
37
  @timeout = timeout
38
38
  @client_config = client_config || {}
39
39
  end
@@ -53,7 +53,7 @@ module Google
53
53
  def service
54
54
  return mocked_service if mocked_service
55
55
  @service ||= \
56
- V1beta1::SpeechClient.new(
56
+ V1::SpeechClient.new(
57
57
  service_path: host,
58
58
  channel: channel,
59
59
  timeout: timeout,
@@ -82,13 +82,14 @@ module Google
82
82
 
83
83
  def recognize_sync audio, config
84
84
  execute do
85
- service.sync_recognize config, audio, options: default_options
85
+ service.recognize config, audio, options: default_options
86
86
  end
87
87
  end
88
88
 
89
89
  def recognize_async audio, config
90
90
  execute do
91
- service.async_recognize config, audio, options: default_options
91
+ service.long_running_recognize \
92
+ config, audio, options: default_options
92
93
  end
93
94
  end
94
95
 
@@ -98,7 +99,12 @@ module Google
98
99
  end
99
100
 
100
101
  def get_op name
101
- execute { ops.get_operation name }
102
+ execute do
103
+ Google::Gax::Operation.new \
104
+ ops.get_operation(name), ops,
105
+ V1::LongRunningRecognizeResponse,
106
+ V1::LongRunningRecognizeMetadata
107
+ end
102
108
  end
103
109
 
104
110
  def inspect
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
- require "google/cloud/speech/v1beta1"
16
+ require "google/cloud/speech/v1"
17
17
  require "google/cloud/speech/result"
18
18
  require "monitor"
19
19
  require "forwardable"
@@ -31,14 +31,9 @@ module Google
31
31
  #
32
32
  # speech = Google::Cloud::Speech.new
33
33
  #
34
- # stream = speech.stream encoding: :raw, sample_rate: 16000
35
- #
36
- # # register callback for when a result is returned
37
- # stream.on_result do |results|
38
- # result = results.first
39
- # puts result.transcript # "how old is the Brooklyn Bridge"
40
- # puts result.confidence # 0.9826789498329163
41
- # end
34
+ # stream = speech.stream encoding: :raw,
35
+ # language: "en-US",
36
+ # sample_rate: 16000
42
37
  #
43
38
  # # Stream 5 seconds of audio from the microphone
44
39
  # # Actual implementation of microphone input varies by platform
@@ -47,6 +42,12 @@ module Google
47
42
  # end
48
43
  #
49
44
  # stream.stop
45
+ # stream.wait_until_complete!
46
+ #
47
+ # results = stream.results
48
+ # result = results.first
49
+ # result.transcript #=> "how old is the Brooklyn Bridge"
50
+ # result.confidence #=> 0.9826789498329163
50
51
  #
51
52
  class Stream
52
53
  include MonitorMixin
@@ -95,14 +96,9 @@ module Google
95
96
  #
96
97
  # audio = speech.audio "path/to/audio.raw"
97
98
  #
98
- # stream = speech.stream encoding: :raw, sample_rate: 16000
99
- #
100
- # # register callback for when a result is returned
101
- # stream.on_result do |results|
102
- # result = results.first
103
- # puts result.transcript # "how old is the Brooklyn Bridge"
104
- # puts result.confidence # 0.9826789498329163
105
- # end
99
+ # stream = speech.stream encoding: :raw,
100
+ # language: "en-US",
101
+ # sample_rate: 16000
106
102
  #
107
103
  # # Stream 5 seconds of audio from the microphone
108
104
  # # Actual implementation of microphone input varies by platform
@@ -111,12 +107,18 @@ module Google
111
107
  # end
112
108
  #
113
109
  # stream.stop
110
+ # stream.wait_until_complete!
111
+ #
112
+ # results = stream.results
113
+ # result = results.first
114
+ # result.transcript #=> "how old is the Brooklyn Bridge"
115
+ # result.confidence #=> 0.9826789498329163
114
116
  #
115
117
  def send bytes
116
118
  start # lazily call start if the stream wasn't started yet
117
119
  # TODO: do not send if stopped?
118
120
  synchronize do
119
- req = V1beta1::StreamingRecognizeRequest.new(
121
+ req = V1::StreamingRecognizeRequest.new(
120
122
  audio_content: bytes.encode("ASCII-8BIT"))
121
123
  @request_queue.push req
122
124
  end
@@ -153,7 +155,9 @@ module Google
153
155
  #
154
156
  # speech = Google::Cloud::Speech.new
155
157
  #
156
- # stream = speech.stream encoding: :raw, sample_rate: 16000
158
+ # stream = speech.stream encoding: :raw,
159
+ # language: "en-US",
160
+ # sample_rate: 16000
157
161
  #
158
162
  # # Stream 5 seconds of audio from the microphone
159
163
  # # Actual implementation of microphone input varies by platform
@@ -176,26 +180,18 @@ module Google
176
180
  end
177
181
 
178
182
  ##
179
- # Register to be notified on the reception of an interim result.
183
+ # Whether all speech recognition results have been returned.
180
184
  #
181
- # @yield [callback] The block for accessing final and interim results.
182
- # @yieldparam [Array<Result>] final_results The final results.
183
- # @yieldparam [Array<Result>] interim_results The interim results.
185
+ # @return [Boolean] All speech recognition results have been returned.
184
186
  #
185
187
  # @example
186
188
  # require "google/cloud/speech"
187
189
  #
188
190
  # speech = Google::Cloud::Speech.new
189
191
  #
190
- # stream = speech.stream encoding: :raw, sample_rate: 16000
191
- #
192
- # # register callback for when an interim result is returned
193
- # stream.on_interim do |final_results, interim_results|
194
- # interim_result = interim_results.first
195
- # puts interim_result.transcript # "how old is the Brooklyn Bridge"
196
- # puts interim_result.confidence # 0.9826789498329163
197
- # puts interim_result.stability # 0.8999
198
- # end
192
+ # stream = speech.stream encoding: :raw,
193
+ # language: "en-US",
194
+ # sample_rate: 16000
199
195
  #
200
196
  # # Stream 5 seconds of audio from the microphone
201
197
  # # Actual implementation of microphone input varies by platform
@@ -205,39 +201,32 @@ module Google
205
201
  #
206
202
  # stream.stop
207
203
  #
208
- def on_interim &block
209
- synchronize do
210
- @callbacks[:interim] << block
211
- end
212
- end
213
-
214
- # @private yields two arguments, all final results and the
215
- # non-final/incomplete result
216
- def interim! interim_results
204
+ # stream.wait_until_complete!
205
+ # stream.complete? #=> true
206
+ #
207
+ # results = stream.results
208
+ # results.each do |result|
209
+ # puts result.transcript
210
+ # puts result.confidence
211
+ # end
212
+ #
213
+ def complete?
217
214
  synchronize do
218
- @callbacks[:interim].each { |c| c.call results, interim_results }
215
+ @complete
219
216
  end
220
217
  end
221
218
 
222
219
  ##
223
- # Register to be notified on the reception of a final result.
224
- #
225
- # @yield [callback] The block for accessing final results.
226
- # @yieldparam [Array<Result>] results The final results.
220
+ # Blocks until all speech recognition results have been returned.
227
221
  #
228
222
  # @example
229
223
  # require "google/cloud/speech"
230
224
  #
231
225
  # speech = Google::Cloud::Speech.new
232
226
  #
233
- # stream = speech.stream encoding: :raw, sample_rate: 16000
234
- #
235
- # # register callback for when an interim result is returned
236
- # stream.on_result do |results|
237
- # result = results.first
238
- # puts result.transcript # "how old is the Brooklyn Bridge"
239
- # puts result.confidence # 0.9826789498329163
240
- # end
227
+ # stream = speech.stream encoding: :raw,
228
+ # language: "en-US",
229
+ # sample_rate: 16000
241
230
  #
242
231
  # # Stream 5 seconds of audio from the microphone
243
232
  # # Actual implementation of microphone input varies by platform
@@ -247,45 +236,46 @@ module Google
247
236
  #
248
237
  # stream.stop
249
238
  #
250
- def on_result &block
251
- synchronize do
252
- @callbacks[:result] << block
253
- end
254
- end
255
-
256
- # @private add a result object, and call the callbacks
257
- def add_result!result_index, result_grpc
258
- synchronize do
259
- @results[result_index] = Result.from_grpc result_grpc
260
- end
261
- # callback for final result received
262
- result!
263
- end
264
-
265
- # @private yields each final results as they are received
266
- def result!
267
- synchronize do
268
- @callbacks[:result].each { |c| c.call results }
239
+ # stream.wait_until_complete!
240
+ # stream.complete? #=> true
241
+ #
242
+ # results = stream.results
243
+ # results.each do |result|
244
+ # puts result.transcript
245
+ # puts result.confidence
246
+ # end
247
+ #
248
+ def wait_until_complete!
249
+ complete_check = nil
250
+ synchronize { complete_check = @complete }
251
+ while complete_check.nil?
252
+ sleep 1
253
+ synchronize { complete_check = @complete }
269
254
  end
270
255
  end
271
256
 
272
257
  ##
273
- # Register to be notified when speech has been detected in the audio
274
- # stream.
258
+ # Register to be notified on the reception of an interim result.
275
259
  #
276
- # @yield [callback] The block to be called when speech has been detected
277
- # in the audio stream.
260
+ # @yield [callback] The block for accessing final and interim results.
261
+ # @yieldparam [Array<Result>] final_results The final results.
262
+ # @yieldparam [Array<Result>] interim_results The interim results.
278
263
  #
279
264
  # @example
280
265
  # require "google/cloud/speech"
281
266
  #
282
267
  # speech = Google::Cloud::Speech.new
283
268
  #
284
- # stream = speech.stream encoding: :raw, sample_rate: 16000
269
+ # stream = speech.stream encoding: :raw,
270
+ # language: "en-US",
271
+ # sample_rate: 16000
285
272
  #
286
- # # register callback for when speech has started.
287
- # stream.on_speech_start do
288
- # puts "Speech has started."
273
+ # # register callback for when an interim result is returned
274
+ # stream.on_interim do |final_results, interim_results|
275
+ # interim_result = interim_results.first
276
+ # puts interim_result.transcript # "how old is the Brooklyn Bridge"
277
+ # puts interim_result.confidence # 0.9826789498329163
278
+ # puts interim_result.stability # 0.8999
289
279
  # end
290
280
  #
291
281
  # # Stream 5 seconds of audio from the microphone
@@ -296,38 +286,35 @@ module Google
296
286
  #
297
287
  # stream.stop
298
288
  #
299
- def on_speech_start &block
289
+ def on_interim &block
300
290
  synchronize do
301
- @callbacks[:speech_start] << block
291
+ @callbacks[:interim] << block
302
292
  end
303
293
  end
304
294
 
305
- # @private returns single final result once :END_OF_UTTERANCE is
306
- # received.
307
- def speech_start!
295
+ ##
296
+ # @private yields two arguments, all final results and the
297
+ # non-final/incomplete result
298
+ def pass_interim! interim_results
308
299
  synchronize do
309
- @callbacks[:speech_start].each(&:call)
300
+ @callbacks[:interim].each { |c| c.call results, interim_results }
310
301
  end
311
302
  end
312
303
 
313
304
  ##
314
- # Register to be notified when speech has ceased to be detected in the
315
- # audio stream.
305
+ # Register to be notified on the reception of a final result.
316
306
  #
317
- # @yield [callback] The block to be called when speech has ceased to be
318
- # detected in the audio stream.
307
+ # @yield [callback] The block for accessing final results.
308
+ # @yieldparam [Array<Result>] results The final results.
319
309
  #
320
310
  # @example
321
311
  # require "google/cloud/speech"
322
312
  #
323
313
  # speech = Google::Cloud::Speech.new
324
314
  #
325
- # stream = speech.stream encoding: :raw, sample_rate: 16000
326
- #
327
- # # register callback for when speech has ended.
328
- # stream.on_speech_end do
329
- # puts "Speech has ended."
330
- # end
315
+ # stream = speech.stream encoding: :raw,
316
+ # language: "en-US",
317
+ # sample_rate: 16000
331
318
  #
332
319
  # # Stream 5 seconds of audio from the microphone
333
320
  # # Actual implementation of microphone input varies by platform
@@ -336,18 +323,25 @@ module Google
336
323
  # end
337
324
  #
338
325
  # stream.stop
326
+ # stream.wait_until_complete!
339
327
  #
340
- def on_speech_end &block
328
+ # results = stream.results
329
+ # result = results.first
330
+ # result.transcript #=> "how old is the Brooklyn Bridge"
331
+ # result.confidence #=> 0.9826789498329163
332
+ #
333
+ def on_result &block
341
334
  synchronize do
342
- @callbacks[:speech_end] << block
335
+ @callbacks[:result] << block
343
336
  end
344
337
  end
345
338
 
346
- # @private yields single final result once :END_OF_UTTERANCE is
347
- # received.
348
- def speech_end!
339
+ ##
340
+ # @private add a result object, and call the callbacks
341
+ def pass_result! result_grpc
349
342
  synchronize do
350
- @callbacks[:speech_end].each(&:call)
343
+ @results << Result.from_grpc(result_grpc)
344
+ @callbacks[:result].each { |c| c.call @results }
351
345
  end
352
346
  end
353
347
 
@@ -363,11 +357,13 @@ module Google
363
357
  #
364
358
  # speech = Google::Cloud::Speech.new
365
359
  #
366
- # stream = speech.stream encoding: :raw, sample_rate: 16000
360
+ # stream = speech.stream encoding: :raw,
361
+ # language: "en-US",
362
+ # sample_rate: 16000
367
363
  #
368
- # # register callback for when audio has ended.
364
+ # # register callback for when stream has ended.
369
365
  # stream.on_complete do
370
- # puts "Audio has ended."
366
+ # puts "Stream has ended."
371
367
  # end
372
368
  #
373
369
  # # Stream 5 seconds of audio from the microphone
@@ -384,11 +380,11 @@ module Google
384
380
  end
385
381
  end
386
382
 
387
- # @private yields all final results once the recognition is completed
388
- # depending on how the Stream is configured, this can be on the
389
- # reception of :END_OF_AUDIO or :END_OF_UTTERANCE.
390
- def complete!
383
+ ##
384
+ # @private yields when the end of the audio stream has been reached.
385
+ def pass_complete!
391
386
  synchronize do
387
+ @complete = true
392
388
  @callbacks[:complete].each(&:call)
393
389
  end
394
390
  end
@@ -409,6 +405,7 @@ module Google
409
405
  # speech = Google::Cloud::Speech.new
410
406
  #
411
407
  # stream = speech.stream encoding: :raw,
408
+ # language: "en-US",
412
409
  # sample_rate: 16000,
413
410
  # utterance: true
414
411
  #
@@ -432,9 +429,10 @@ module Google
432
429
  end
433
430
  end
434
431
 
435
- # @private returns single final result once :END_OF_UTTERANCE is
432
+ ##
433
+ # @private returns single final result once :END_OF_SINGLE_UTTERANCE is
436
434
  # received.
437
- def utterance!
435
+ def pass_utterance!
438
436
  synchronize do
439
437
  @callbacks[:utterance].each(&:call)
440
438
  end
@@ -451,7 +449,9 @@ module Google
451
449
  #
452
450
  # speech = Google::Cloud::Speech.new
453
451
  #
454
- # stream = speech.stream encoding: :raw, sample_rate: 16000
452
+ # stream = speech.stream encoding: :raw,
453
+ # language: "en-US",
454
+ # sample_rate: 16000
455
455
  #
456
456
  # # register callback for when an error is returned
457
457
  # stream.on_error do |error|
@@ -487,12 +487,16 @@ module Google
487
487
  response_enum.each do |response|
488
488
  begin
489
489
  background_results response
490
- background_endpointer response.endpointer_type
490
+ background_event_type response.speech_event_type
491
491
  background_error response.error
492
492
  rescue => e
493
493
  error! Google::Cloud::Error.from_error(e)
494
494
  end
495
495
  end
496
+ rescue => e
497
+ error! Google::Cloud::Error.from_error(e)
498
+ ensure
499
+ pass_complete!
496
500
  Thread.pass
497
501
  end
498
502
 
@@ -501,10 +505,9 @@ module Google
501
505
  return unless response.results && response.results.any?
502
506
 
503
507
  final_grpc, interim_grpcs = *response.results
504
- if final_grpc && final_grpc.is_final
505
- add_result! response.result_index, final_grpc
506
- else
508
+ unless final_grpc && final_grpc.is_final
507
509
  # all results are interim
510
+ final_grpc = nil
508
511
  interim_grpcs = response.results
509
512
  end
510
513
 
@@ -512,23 +515,17 @@ module Google
512
515
  interim_results = Array(interim_grpcs).map do |grpc|
513
516
  InterimResult.from_grpc grpc
514
517
  end
518
+
515
519
  # callback for interim results received
516
- interim! interim_results if interim_results.any?
520
+ pass_interim! interim_results if interim_results.any?
521
+ # callback for final results received, if any
522
+ pass_result! final_grpc if final_grpc
517
523
  end
518
524
 
519
- def background_endpointer endpointer
520
- # Handle the endpointer by raising events
521
- if endpointer == :START_OF_SPEECH
522
- speech_start!
523
- elsif endpointer == :END_OF_SPEECH
524
- speech_end!
525
- elsif endpointer == :END_OF_AUDIO
526
- # TODO: do we automatically call stop here?
527
- complete!
528
- elsif endpointer == :END_OF_UTTERANCE
529
- # TODO: do we automatically call stop here?
530
- utterance!
531
- end
525
+ def background_event_type event_type
526
+ # Handle the event_type by raising events
527
+ # TODO: do we automatically call stop here?
528
+ pass_utterance! if event_type == :END_OF_SINGLE_UTTERANCE
532
529
  end
533
530
 
534
531
  def background_error error