google-cloud-speech 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
- require "google/cloud/speech/v1beta1"
16
+ require "google/cloud/speech/v1"
17
17
 
18
18
  module Google
19
19
  module Cloud
@@ -23,9 +23,9 @@ module Google
23
23
  #
24
24
  # A speech recognition result corresponding to a portion of the audio.
25
25
  #
26
- # See {Project#recognize} and {Job#results}.
26
+ # See {Project#recognize} and {Operation#results}.
27
27
  #
28
- # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.SpeechRecognitionResult
28
+ # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.V1#google.cloud.speech.V1.SpeechRecognitionResult
29
29
  # SpeechRecognitionResult
30
30
  #
31
31
  # @attr_reader [String] transcript Transcript text representing the words
@@ -46,7 +46,9 @@ module Google
46
46
  # speech = Google::Cloud::Speech.new
47
47
  #
48
48
  # audio = speech.audio "path/to/audio.raw",
49
- # encoding: :raw, sample_rate: 16000
49
+ # encoding: :raw,
50
+ # language: "en-US",
51
+ # sample_rate: 16000
50
52
  # results = audio.recognize
51
53
  #
52
54
  # result = results.first
@@ -94,7 +96,9 @@ module Google
94
96
  # speech = Google::Cloud::Speech.new
95
97
  #
96
98
  # audio = speech.audio "path/to/audio.raw",
97
- # encoding: :raw, sample_rate: 16000
99
+ # encoding: :raw,
100
+ # language: "en-US",
101
+ # sample_rate: 16000
98
102
  # results = audio.recognize
99
103
  #
100
104
  # result = results.first
@@ -124,9 +128,9 @@ module Google
124
128
  #
125
129
  # See {Project#stream} and {Stream#on_interim}.
126
130
  #
127
- # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.SpeechRecognitionResult
131
+ # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.V1#google.cloud.speech.V1.SpeechRecognitionResult
128
132
  # SpeechRecognitionResult
129
- # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.StreamingRecognitionResult
133
+ # @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.V1#google.cloud.speech.V1.StreamingRecognitionResult
130
134
  # StreamingRecognitionResult
131
135
  #
132
136
  # @attr_reader [String] transcript Transcript text representing the words
@@ -150,7 +154,9 @@ module Google
150
154
  #
151
155
  # speech = Google::Cloud::Speech.new
152
156
  #
153
- # stream = speech.stream encoding: :raw, sample_rate: 16000
157
+ # stream = speech.stream encoding: :raw,
158
+ # language: "en-US",
159
+ # sample_rate: 16000
154
160
  #
155
161
  # # register callback for when an interim result is returned
156
162
  # stream.on_interim do |final_results, interim_results|
@@ -16,7 +16,7 @@
16
16
  require "google/cloud/errors"
17
17
  require "google/cloud/speech/credentials"
18
18
  require "google/cloud/speech/version"
19
- require "google/cloud/speech/v1beta1"
19
+ require "google/cloud/speech/v1"
20
20
 
21
21
  module Google
22
22
  module Cloud
@@ -33,7 +33,7 @@ module Google
33
33
  client_config: nil
34
34
  @project = project
35
35
  @credentials = credentials
36
- @host = host || V1beta1::SpeechClient::SERVICE_ADDRESS
36
+ @host = host || V1::SpeechClient::SERVICE_ADDRESS
37
37
  @timeout = timeout
38
38
  @client_config = client_config || {}
39
39
  end
@@ -53,7 +53,7 @@ module Google
53
53
  def service
54
54
  return mocked_service if mocked_service
55
55
  @service ||= \
56
- V1beta1::SpeechClient.new(
56
+ V1::SpeechClient.new(
57
57
  service_path: host,
58
58
  channel: channel,
59
59
  timeout: timeout,
@@ -82,13 +82,14 @@ module Google
82
82
 
83
83
  def recognize_sync audio, config
84
84
  execute do
85
- service.sync_recognize config, audio, options: default_options
85
+ service.recognize config, audio, options: default_options
86
86
  end
87
87
  end
88
88
 
89
89
  def recognize_async audio, config
90
90
  execute do
91
- service.async_recognize config, audio, options: default_options
91
+ service.long_running_recognize \
92
+ config, audio, options: default_options
92
93
  end
93
94
  end
94
95
 
@@ -98,7 +99,12 @@ module Google
98
99
  end
99
100
 
100
101
  def get_op name
101
- execute { ops.get_operation name }
102
+ execute do
103
+ Google::Gax::Operation.new \
104
+ ops.get_operation(name), ops,
105
+ V1::LongRunningRecognizeResponse,
106
+ V1::LongRunningRecognizeMetadata
107
+ end
102
108
  end
103
109
 
104
110
  def inspect
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
- require "google/cloud/speech/v1beta1"
16
+ require "google/cloud/speech/v1"
17
17
  require "google/cloud/speech/result"
18
18
  require "monitor"
19
19
  require "forwardable"
@@ -31,14 +31,9 @@ module Google
31
31
  #
32
32
  # speech = Google::Cloud::Speech.new
33
33
  #
34
- # stream = speech.stream encoding: :raw, sample_rate: 16000
35
- #
36
- # # register callback for when a result is returned
37
- # stream.on_result do |results|
38
- # result = results.first
39
- # puts result.transcript # "how old is the Brooklyn Bridge"
40
- # puts result.confidence # 0.9826789498329163
41
- # end
34
+ # stream = speech.stream encoding: :raw,
35
+ # language: "en-US",
36
+ # sample_rate: 16000
42
37
  #
43
38
  # # Stream 5 seconds of audio from the microphone
44
39
  # # Actual implementation of microphone input varies by platform
@@ -47,6 +42,12 @@ module Google
47
42
  # end
48
43
  #
49
44
  # stream.stop
45
+ # stream.wait_until_complete!
46
+ #
47
+ # results = stream.results
48
+ # result = results.first
49
+ # result.transcript #=> "how old is the Brooklyn Bridge"
50
+ # result.confidence #=> 0.9826789498329163
50
51
  #
51
52
  class Stream
52
53
  include MonitorMixin
@@ -95,14 +96,9 @@ module Google
95
96
  #
96
97
  # audio = speech.audio "path/to/audio.raw"
97
98
  #
98
- # stream = speech.stream encoding: :raw, sample_rate: 16000
99
- #
100
- # # register callback for when a result is returned
101
- # stream.on_result do |results|
102
- # result = results.first
103
- # puts result.transcript # "how old is the Brooklyn Bridge"
104
- # puts result.confidence # 0.9826789498329163
105
- # end
99
+ # stream = speech.stream encoding: :raw,
100
+ # language: "en-US",
101
+ # sample_rate: 16000
106
102
  #
107
103
  # # Stream 5 seconds of audio from the microphone
108
104
  # # Actual implementation of microphone input varies by platform
@@ -111,12 +107,18 @@ module Google
111
107
  # end
112
108
  #
113
109
  # stream.stop
110
+ # stream.wait_until_complete!
111
+ #
112
+ # results = stream.results
113
+ # result = results.first
114
+ # result.transcript #=> "how old is the Brooklyn Bridge"
115
+ # result.confidence #=> 0.9826789498329163
114
116
  #
115
117
  def send bytes
116
118
  start # lazily call start if the stream wasn't started yet
117
119
  # TODO: do not send if stopped?
118
120
  synchronize do
119
- req = V1beta1::StreamingRecognizeRequest.new(
121
+ req = V1::StreamingRecognizeRequest.new(
120
122
  audio_content: bytes.encode("ASCII-8BIT"))
121
123
  @request_queue.push req
122
124
  end
@@ -153,7 +155,9 @@ module Google
153
155
  #
154
156
  # speech = Google::Cloud::Speech.new
155
157
  #
156
- # stream = speech.stream encoding: :raw, sample_rate: 16000
158
+ # stream = speech.stream encoding: :raw,
159
+ # language: "en-US",
160
+ # sample_rate: 16000
157
161
  #
158
162
  # # Stream 5 seconds of audio from the microphone
159
163
  # # Actual implementation of microphone input varies by platform
@@ -176,26 +180,18 @@ module Google
176
180
  end
177
181
 
178
182
  ##
179
- # Register to be notified on the reception of an interim result.
183
+ # Whether all speech recognition results have been returned.
180
184
  #
181
- # @yield [callback] The block for accessing final and interim results.
182
- # @yieldparam [Array<Result>] final_results The final results.
183
- # @yieldparam [Array<Result>] interim_results The interim results.
185
+ # @return [Boolean] All speech recognition results have been returned.
184
186
  #
185
187
  # @example
186
188
  # require "google/cloud/speech"
187
189
  #
188
190
  # speech = Google::Cloud::Speech.new
189
191
  #
190
- # stream = speech.stream encoding: :raw, sample_rate: 16000
191
- #
192
- # # register callback for when an interim result is returned
193
- # stream.on_interim do |final_results, interim_results|
194
- # interim_result = interim_results.first
195
- # puts interim_result.transcript # "how old is the Brooklyn Bridge"
196
- # puts interim_result.confidence # 0.9826789498329163
197
- # puts interim_result.stability # 0.8999
198
- # end
192
+ # stream = speech.stream encoding: :raw,
193
+ # language: "en-US",
194
+ # sample_rate: 16000
199
195
  #
200
196
  # # Stream 5 seconds of audio from the microphone
201
197
  # # Actual implementation of microphone input varies by platform
@@ -205,39 +201,32 @@ module Google
205
201
  #
206
202
  # stream.stop
207
203
  #
208
- def on_interim &block
209
- synchronize do
210
- @callbacks[:interim] << block
211
- end
212
- end
213
-
214
- # @private yields two arguments, all final results and the
215
- # non-final/incomplete result
216
- def interim! interim_results
204
+ # stream.wait_until_complete!
205
+ # stream.complete? #=> true
206
+ #
207
+ # results = stream.results
208
+ # results.each do |result|
209
+ # puts result.transcript
210
+ # puts result.confidence
211
+ # end
212
+ #
213
+ def complete?
217
214
  synchronize do
218
- @callbacks[:interim].each { |c| c.call results, interim_results }
215
+ @complete
219
216
  end
220
217
  end
221
218
 
222
219
  ##
223
- # Register to be notified on the reception of a final result.
224
- #
225
- # @yield [callback] The block for accessing final results.
226
- # @yieldparam [Array<Result>] results The final results.
220
+ # Blocks until all speech recognition results have been returned.
227
221
  #
228
222
  # @example
229
223
  # require "google/cloud/speech"
230
224
  #
231
225
  # speech = Google::Cloud::Speech.new
232
226
  #
233
- # stream = speech.stream encoding: :raw, sample_rate: 16000
234
- #
235
- # # register callback for when an interim result is returned
236
- # stream.on_result do |results|
237
- # result = results.first
238
- # puts result.transcript # "how old is the Brooklyn Bridge"
239
- # puts result.confidence # 0.9826789498329163
240
- # end
227
+ # stream = speech.stream encoding: :raw,
228
+ # language: "en-US",
229
+ # sample_rate: 16000
241
230
  #
242
231
  # # Stream 5 seconds of audio from the microphone
243
232
  # # Actual implementation of microphone input varies by platform
@@ -247,45 +236,46 @@ module Google
247
236
  #
248
237
  # stream.stop
249
238
  #
250
- def on_result &block
251
- synchronize do
252
- @callbacks[:result] << block
253
- end
254
- end
255
-
256
- # @private add a result object, and call the callbacks
257
- def add_result!result_index, result_grpc
258
- synchronize do
259
- @results[result_index] = Result.from_grpc result_grpc
260
- end
261
- # callback for final result received
262
- result!
263
- end
264
-
265
- # @private yields each final results as they are received
266
- def result!
267
- synchronize do
268
- @callbacks[:result].each { |c| c.call results }
239
+ # stream.wait_until_complete!
240
+ # stream.complete? #=> true
241
+ #
242
+ # results = stream.results
243
+ # results.each do |result|
244
+ # puts result.transcript
245
+ # puts result.confidence
246
+ # end
247
+ #
248
+ def wait_until_complete!
249
+ complete_check = nil
250
+ synchronize { complete_check = @complete }
251
+ while complete_check.nil?
252
+ sleep 1
253
+ synchronize { complete_check = @complete }
269
254
  end
270
255
  end
271
256
 
272
257
  ##
273
- # Register to be notified when speech has been detected in the audio
274
- # stream.
258
+ # Register to be notified on the reception of an interim result.
275
259
  #
276
- # @yield [callback] The block to be called when speech has been detected
277
- # in the audio stream.
260
+ # @yield [callback] The block for accessing final and interim results.
261
+ # @yieldparam [Array<Result>] final_results The final results.
262
+ # @yieldparam [Array<Result>] interim_results The interim results.
278
263
  #
279
264
  # @example
280
265
  # require "google/cloud/speech"
281
266
  #
282
267
  # speech = Google::Cloud::Speech.new
283
268
  #
284
- # stream = speech.stream encoding: :raw, sample_rate: 16000
269
+ # stream = speech.stream encoding: :raw,
270
+ # language: "en-US",
271
+ # sample_rate: 16000
285
272
  #
286
- # # register callback for when speech has started.
287
- # stream.on_speech_start do
288
- # puts "Speech has started."
273
+ # # register callback for when an interim result is returned
274
+ # stream.on_interim do |final_results, interim_results|
275
+ # interim_result = interim_results.first
276
+ # puts interim_result.transcript # "how old is the Brooklyn Bridge"
277
+ # puts interim_result.confidence # 0.9826789498329163
278
+ # puts interim_result.stability # 0.8999
289
279
  # end
290
280
  #
291
281
  # # Stream 5 seconds of audio from the microphone
@@ -296,38 +286,35 @@ module Google
296
286
  #
297
287
  # stream.stop
298
288
  #
299
- def on_speech_start &block
289
+ def on_interim &block
300
290
  synchronize do
301
- @callbacks[:speech_start] << block
291
+ @callbacks[:interim] << block
302
292
  end
303
293
  end
304
294
 
305
- # @private returns single final result once :END_OF_UTTERANCE is
306
- # received.
307
- def speech_start!
295
+ ##
296
+ # @private yields two arguments, all final results and the
297
+ # non-final/incomplete result
298
+ def pass_interim! interim_results
308
299
  synchronize do
309
- @callbacks[:speech_start].each(&:call)
300
+ @callbacks[:interim].each { |c| c.call results, interim_results }
310
301
  end
311
302
  end
312
303
 
313
304
  ##
314
- # Register to be notified when speech has ceased to be detected in the
315
- # audio stream.
305
+ # Register to be notified on the reception of a final result.
316
306
  #
317
- # @yield [callback] The block to be called when speech has ceased to be
318
- # detected in the audio stream.
307
+ # @yield [callback] The block for accessing final results.
308
+ # @yieldparam [Array<Result>] results The final results.
319
309
  #
320
310
  # @example
321
311
  # require "google/cloud/speech"
322
312
  #
323
313
  # speech = Google::Cloud::Speech.new
324
314
  #
325
- # stream = speech.stream encoding: :raw, sample_rate: 16000
326
- #
327
- # # register callback for when speech has ended.
328
- # stream.on_speech_end do
329
- # puts "Speech has ended."
330
- # end
315
+ # stream = speech.stream encoding: :raw,
316
+ # language: "en-US",
317
+ # sample_rate: 16000
331
318
  #
332
319
  # # Stream 5 seconds of audio from the microphone
333
320
  # # Actual implementation of microphone input varies by platform
@@ -336,18 +323,25 @@ module Google
336
323
  # end
337
324
  #
338
325
  # stream.stop
326
+ # stream.wait_until_complete!
339
327
  #
340
- def on_speech_end &block
328
+ # results = stream.results
329
+ # result = results.first
330
+ # result.transcript #=> "how old is the Brooklyn Bridge"
331
+ # result.confidence #=> 0.9826789498329163
332
+ #
333
+ def on_result &block
341
334
  synchronize do
342
- @callbacks[:speech_end] << block
335
+ @callbacks[:result] << block
343
336
  end
344
337
  end
345
338
 
346
- # @private yields single final result once :END_OF_UTTERANCE is
347
- # received.
348
- def speech_end!
339
+ ##
340
+ # @private add a result object, and call the callbacks
341
+ def pass_result! result_grpc
349
342
  synchronize do
350
- @callbacks[:speech_end].each(&:call)
343
+ @results << Result.from_grpc(result_grpc)
344
+ @callbacks[:result].each { |c| c.call @results }
351
345
  end
352
346
  end
353
347
 
@@ -363,11 +357,13 @@ module Google
363
357
  #
364
358
  # speech = Google::Cloud::Speech.new
365
359
  #
366
- # stream = speech.stream encoding: :raw, sample_rate: 16000
360
+ # stream = speech.stream encoding: :raw,
361
+ # language: "en-US",
362
+ # sample_rate: 16000
367
363
  #
368
- # # register callback for when audio has ended.
364
+ # # register callback for when stream has ended.
369
365
  # stream.on_complete do
370
- # puts "Audio has ended."
366
+ # puts "Stream has ended."
371
367
  # end
372
368
  #
373
369
  # # Stream 5 seconds of audio from the microphone
@@ -384,11 +380,11 @@ module Google
384
380
  end
385
381
  end
386
382
 
387
- # @private yields all final results once the recognition is completed
388
- # depending on how the Stream is configured, this can be on the
389
- # reception of :END_OF_AUDIO or :END_OF_UTTERANCE.
390
- def complete!
383
+ ##
384
+ # @private yields when the end of the audio stream has been reached.
385
+ def pass_complete!
391
386
  synchronize do
387
+ @complete = true
392
388
  @callbacks[:complete].each(&:call)
393
389
  end
394
390
  end
@@ -409,6 +405,7 @@ module Google
409
405
  # speech = Google::Cloud::Speech.new
410
406
  #
411
407
  # stream = speech.stream encoding: :raw,
408
+ # language: "en-US",
412
409
  # sample_rate: 16000,
413
410
  # utterance: true
414
411
  #
@@ -432,9 +429,10 @@ module Google
432
429
  end
433
430
  end
434
431
 
435
- # @private returns single final result once :END_OF_UTTERANCE is
432
+ ##
433
+ # @private returns single final result once :END_OF_SINGLE_UTTERANCE is
436
434
  # received.
437
- def utterance!
435
+ def pass_utterance!
438
436
  synchronize do
439
437
  @callbacks[:utterance].each(&:call)
440
438
  end
@@ -451,7 +449,9 @@ module Google
451
449
  #
452
450
  # speech = Google::Cloud::Speech.new
453
451
  #
454
- # stream = speech.stream encoding: :raw, sample_rate: 16000
452
+ # stream = speech.stream encoding: :raw,
453
+ # language: "en-US",
454
+ # sample_rate: 16000
455
455
  #
456
456
  # # register callback for when an error is returned
457
457
  # stream.on_error do |error|
@@ -487,12 +487,16 @@ module Google
487
487
  response_enum.each do |response|
488
488
  begin
489
489
  background_results response
490
- background_endpointer response.endpointer_type
490
+ background_event_type response.speech_event_type
491
491
  background_error response.error
492
492
  rescue => e
493
493
  error! Google::Cloud::Error.from_error(e)
494
494
  end
495
495
  end
496
+ rescue => e
497
+ error! Google::Cloud::Error.from_error(e)
498
+ ensure
499
+ pass_complete!
496
500
  Thread.pass
497
501
  end
498
502
 
@@ -501,10 +505,9 @@ module Google
501
505
  return unless response.results && response.results.any?
502
506
 
503
507
  final_grpc, interim_grpcs = *response.results
504
- if final_grpc && final_grpc.is_final
505
- add_result! response.result_index, final_grpc
506
- else
508
+ unless final_grpc && final_grpc.is_final
507
509
  # all results are interim
510
+ final_grpc = nil
508
511
  interim_grpcs = response.results
509
512
  end
510
513
 
@@ -512,23 +515,17 @@ module Google
512
515
  interim_results = Array(interim_grpcs).map do |grpc|
513
516
  InterimResult.from_grpc grpc
514
517
  end
518
+
515
519
  # callback for interim results received
516
- interim! interim_results if interim_results.any?
520
+ pass_interim! interim_results if interim_results.any?
521
+ # callback for final results received, if any
522
+ pass_result! final_grpc if final_grpc
517
523
  end
518
524
 
519
- def background_endpointer endpointer
520
- # Handle the endpointer by raising events
521
- if endpointer == :START_OF_SPEECH
522
- speech_start!
523
- elsif endpointer == :END_OF_SPEECH
524
- speech_end!
525
- elsif endpointer == :END_OF_AUDIO
526
- # TODO: do we automatically call stop here?
527
- complete!
528
- elsif endpointer == :END_OF_UTTERANCE
529
- # TODO: do we automatically call stop here?
530
- utterance!
531
- end
525
+ def background_event_type event_type
526
+ # Handle the event_type by raising events
527
+ # TODO: do we automatically call stop here?
528
+ pass_utterance! if event_type == :END_OF_SINGLE_UTTERANCE
532
529
  end
533
530
 
534
531
  def background_error error