google-cloud-speech 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -1
  3. data/LICENSE +1 -1
  4. data/README.md +69 -43
  5. data/lib/google/cloud/speech.rb +94 -252
  6. data/lib/google/cloud/speech/v1.rb +11 -1
  7. data/lib/google/cloud/speech/v1/cloud_speech_services_pb.rb +1 -1
  8. data/lib/google/cloud/speech/{version.rb → v1/credentials.rb} +12 -2
  9. data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +2 -2
  10. data/lib/google/cloud/speech/v1/doc/google/longrunning/operations.rb +92 -0
  11. data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +1 -1
  12. data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +1 -1
  13. data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +1 -1
  14. data/lib/google/cloud/speech/v1/doc/overview.rb +1 -1
  15. data/lib/google/cloud/speech/v1/helpers.rb +93 -0
  16. data/lib/google/cloud/speech/v1/speech_client.rb +26 -49
  17. data/lib/google/cloud/speech/v1/speech_client_config.json +5 -5
  18. data/lib/google/cloud/speech/v1/stream.rb +614 -0
  19. data/lib/google/cloud/speech/v1p1beta1.rb +126 -0
  20. data/lib/google/cloud/speech/v1p1beta1/cloud_speech_pb.rb +175 -0
  21. data/lib/google/cloud/speech/v1p1beta1/cloud_speech_services_pb.rb +54 -0
  22. data/lib/google/cloud/speech/v1p1beta1/credentials.rb +32 -0
  23. data/lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb +625 -0
  24. data/lib/google/cloud/speech/v1p1beta1/doc/google/longrunning/operations.rb +92 -0
  25. data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/any.rb +124 -0
  26. data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/duration.rb +90 -0
  27. data/lib/google/cloud/speech/v1p1beta1/doc/google/rpc/status.rb +83 -0
  28. data/lib/google/cloud/speech/v1p1beta1/doc/overview.rb +73 -0
  29. data/lib/google/cloud/speech/v1p1beta1/helpers.rb +93 -0
  30. data/lib/google/cloud/speech/v1p1beta1/speech_client.rb +322 -0
  31. data/lib/google/cloud/speech/v1p1beta1/speech_client_config.json +41 -0
  32. data/lib/google/cloud/speech/v1p1beta1/stream.rb +614 -0
  33. metadata +29 -120
  34. data/lib/google-cloud-speech.rb +0 -142
  35. data/lib/google/cloud/speech/audio.rb +0 -330
  36. data/lib/google/cloud/speech/convert.rb +0 -46
  37. data/lib/google/cloud/speech/credentials.rb +0 -57
  38. data/lib/google/cloud/speech/operation.rb +0 -262
  39. data/lib/google/cloud/speech/project.rb +0 -651
  40. data/lib/google/cloud/speech/result.rb +0 -240
  41. data/lib/google/cloud/speech/service.rb +0 -121
  42. data/lib/google/cloud/speech/stream.rb +0 -564
@@ -0,0 +1,614 @@
1
+ # Copyright 2018 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/speech/v1p1beta1"
17
+ require "monitor"
18
+ require "forwardable"
19
+
20
+ module Google
21
+ module Cloud
22
+ module Speech
23
+ module V1p1beta1
24
+ ##
25
+ # # Stream
26
+ #
27
+ # A resource that represents the streaming requests and responses.
28
+ #
29
+ # @example
30
+ # require "google/cloud/speech"
31
+ #
32
+ # speech_client = Google::Cloud::Speech.new version: :v1p1beta1
33
+ # streaming_config = {
34
+ # config: {
35
+ # encoding: :linear16,
36
+ # language_code: "en-US",
37
+ # sample_rate_hertz: 16000
38
+ # }
39
+ # }
40
+ # stream = speech_client.streaming_recognize(streaming_config)
41
+ #
42
+ # # Stream 5 seconds of audio from the microphone
43
+ # # Actual implementation of microphone input varies by platform
44
+ # 5.times do
45
+ # stream.send MicrophoneInput.read(32000)
46
+ # end
47
+ #
48
+ # stream.stop
49
+ # stream.wait_until_complete!
50
+ #
51
+ # results = stream.results
52
+ # result = results.first.alternatives.first
53
+ # result.transcript #=> "how old is the Brooklyn Bridge"
54
+ # result.confidence #=> 0.9826789498329163
55
+ #
56
+ class Stream
57
+ include MonitorMixin
58
+ ##
59
+ # @private Creates a new Speech Stream instance.
60
+ # This must always be private, since it may change as the implementation
61
+ # changes over time.
62
+ def initialize streaming_config, streaming_call
63
+ @streaming_call = streaming_call
64
+ @streaming_recognize_request = {
65
+ streaming_config: streaming_config
66
+ }
67
+ @results = []
68
+ @callbacks = Hash.new { |h, k| h[k] = [] }
69
+ super() # to init MonitorMixin
70
+ end
71
+
72
+ ##
73
+ # Starts the stream. The stream will be started in the first #send call.
74
+ def start
75
+ return if @request_queue
76
+ @request_queue = EnumeratorQueue.new(self)
77
+ @request_queue.push @streaming_recognize_request
78
+
79
+ Thread.new { background_run }
80
+ end
81
+
82
+ ##
83
+ # Checks if the stream has been started.
84
+ #
85
+ # @return [boolean] `true` when started, `false` otherwise.
86
+ def started?
87
+ synchronize do
88
+ !(!@request_queue)
89
+ end
90
+ end
91
+
92
+ ##
93
+ # Sends audio content to the server.
94
+ #
95
+ # @param [String] bytes A string of binary audio data to be recognized.
96
+ # The data should be encoded as `ASCII-8BIT`.
97
+ #
98
+ # @example
99
+ # require "google/cloud/speech"
100
+ #
101
+ # speech_client = Google::Cloud::Speech.new version: :v1p1beta1
102
+ # streaming_config = {
103
+ # config: {
104
+ # encoding: :linear16,
105
+ # language_code: "en-US",
106
+ # sample_rate_hertz: 16000
107
+ # }
108
+ # }
109
+ # stream = speech_client.streaming_recognize(streaming_config)
110
+ #
111
+ # # Stream 5 seconds of audio from the microphone
112
+ # # Actual implementation of microphone input varies by platform
113
+ # 5.times do
114
+ # stream.send MicrophoneInput.read(32000)
115
+ # end
116
+ #
117
+ # stream.stop
118
+ # stream.wait_until_complete!
119
+ #
120
+ # results = stream.results
121
+ # result = results.first.alternatives.first
122
+ # result.transcript #=> "how old is the Brooklyn Bridge"
123
+ # result.confidence #=> 0.9826789498329163
124
+ #
125
+ def send bytes
126
+ start # lazily call start if the stream wasn't started yet
127
+ # TODO: do not send if stopped?
128
+ synchronize do
129
+ req = V1p1beta1::StreamingRecognizeRequest.new(
130
+ audio_content: bytes.encode(Encoding::ASCII_8BIT)
131
+ )
132
+ @request_queue.push req
133
+ end
134
+ end
135
+
136
+ ##
137
+ # Stops the stream. Signals to the server that no more data will be
138
+ # sent.
139
+ def stop
140
+ synchronize do
141
+ return if @request_queue.nil?
142
+ @request_queue.push self
143
+ @stopped = true
144
+ end
145
+ end
146
+
147
+ ##
148
+ # Checks if the stream has been stopped.
149
+ #
150
+ # @return [boolean] `true` when stopped, `false` otherwise.
151
+ def stopped?
152
+ synchronize do
153
+ @stopped
154
+ end
155
+ end
156
+
157
+ ##
158
+ # The speech recognition results for the audio.
159
+ #
160
+ # @return [Array<Result>] The transcribed text of audio recognized.
161
+ #
162
+ # @example
163
+ # require "google/cloud/speech"
164
+ #
165
+ # speech_client = Google::Cloud::Speech.new version: :v1p1beta1
166
+ # streaming_config = {
167
+ # config: {
168
+ # encoding: :linear16,
169
+ # language_code: "en-US",
170
+ # sample_rate_hertz: 16000
171
+ # }
172
+ # }
173
+ # stream = speech_client.streaming_recognize(streaming_config)
174
+ #
175
+ # # Stream 5 seconds of audio from the microphone
176
+ # # Actual implementation of microphone input varies by platform
177
+ # 5.times do
178
+ # stream.send MicrophoneInput.read(32000)
179
+ # end
180
+ #
181
+ # stream.stop
182
+ #
183
+ # results = stream.results
184
+ # results.each do |result|
185
+ # result.alternatives.each do |alternative|
186
+ # puts alternative.transcript
187
+ # puts alternative.confidence
188
+ # end
189
+ # end
190
+ #
191
+ def results
192
+ synchronize do
193
+ @results
194
+ end
195
+ end
196
+
197
+ ##
198
+ # Whether all speech recognition results have been returned.
199
+ #
200
+ # @return [Boolean] All speech recognition results have been returned.
201
+ #
202
+ # @example
203
+ # require "google/cloud/speech"
204
+ #
205
+ # speech_client = Google::Cloud::Speech.new version: :v1p1beta1
206
+ # streaming_config = {
207
+ # config: {
208
+ # encoding: :linear16,
209
+ # language_code: "en-US",
210
+ # sample_rate_hertz: 16000
211
+ # }
212
+ # }
213
+ # stream = speech_client.streaming_recognize(streaming_config)
214
+ #
215
+ # # Stream 5 seconds of audio from the microphone
216
+ # # Actual implementation of microphone input varies by platform
217
+ # 5.times do
218
+ # stream.send MicrophoneInput.read(32000)
219
+ # end
220
+ #
221
+ # stream.stop
222
+ #
223
+ # stream.wait_until_complete!
224
+ # stream.complete? #=> true
225
+ #
226
+ # results = stream.results
227
+ # results.each do |result|
228
+ # result.alternatives.each do |alternative|
229
+ # puts alternative.transcript
230
+ # puts alternative.confidence
231
+ # end
232
+ # end
233
+ #
234
+ def complete?
235
+ synchronize do
236
+ @complete
237
+ end
238
+ end
239
+
240
+ ##
241
+ # Blocks until all speech recognition results have been returned.
242
+ #
243
+ # @example
244
+ # require "google/cloud/speech"
245
+ #
246
+ # speech_client = Google::Cloud::Speech.new version: :v1p1beta1
247
+ # streaming_config = {
248
+ # config: {
249
+ # encoding: :linear16,
250
+ # language_code: "en-US",
251
+ # sample_rate_hertz: 16000
252
+ # }
253
+ # }
254
+ # stream = speech_client.streaming_recognize(streaming_config)
255
+ #
256
+ # # Stream 5 seconds of audio from the microphone
257
+ # # Actual implementation of microphone input varies by platform
258
+ # 5.times do
259
+ # stream.send MicrophoneInput.read(32000)
260
+ # end
261
+ #
262
+ # stream.stop
263
+ #
264
+ # stream.wait_until_complete!
265
+ # stream.complete? #=> true
266
+ #
267
+ # results = stream.results
268
+ # results.each do |result|
269
+ # result.alternatives.each do |alternative|
270
+ # puts alternative.transcript
271
+ # puts alternative.confidence
272
+ # end
273
+ # end
274
+ #
275
+ def wait_until_complete!
276
+ complete_check = nil
277
+ synchronize { complete_check = @complete }
278
+ while complete_check.nil?
279
+ sleep 1
280
+ synchronize { complete_check = @complete }
281
+ end
282
+ end
283
+
284
+ ##
285
+ # Register to be notified on the reception of an interim result.
286
+ #
287
+ # @yield [callback] The block for accessing final and interim results.
288
+ # @yieldparam [Array<Result>] final_results The final results.
289
+ # @yieldparam [Array<Result>] interim_results The interim results.
290
+ #
291
+ # @example
292
+ # require "google/cloud/speech"
293
+ #
294
+ # speech_client = Google::Cloud::Speech.new version: :v1p1beta1
295
+ # streaming_config = {
296
+ # config: {
297
+ # encoding: :linear16,
298
+ # language_code: "en-US",
299
+ # sample_rate_hertz: 16000
300
+ # },
301
+ # interim_results: true
302
+ # }
303
+ # stream = speech_client.streaming_recognize(streaming_config)
304
+ #
305
+ # # register callback for when an interim result is returned
306
+ # stream.on_interim do |final_results, interim_results|
307
+ # interim_result = interim_results.first
308
+ # interim_alternative = interim_result.alternatives.first
309
+ # puts interim_alternative.transcript # "how old is the Brooklyn Bridge"
310
+ # puts interim_alternative.confidence # 0.9826789498329163
311
+ # puts interim_result.stability # 0.8999
312
+ # end
313
+ #
314
+ # # Stream 5 seconds of audio from the microphone
315
+ # # Actual implementation of microphone input varies by platform
316
+ # 5.times do
317
+ # stream.send MicrophoneInput.read(32000)
318
+ # end
319
+ #
320
+ # stream.stop
321
+ #
322
+ def on_interim &block
323
+ synchronize do
324
+ @callbacks[:interim] << block
325
+ end
326
+ end
327
+
328
+ ##
329
+ # @private yields two arguments, all final results and the
330
+ # non-final/incomplete result
331
+ def pass_interim! interim_results
332
+ synchronize do
333
+ @callbacks[:interim].each { |c| c.call results, interim_results }
334
+ end
335
+ end
336
+
337
+ ##
338
+ # Register to be notified on the reception of a final result.
339
+ #
340
+ # @yield [callback] The block for accessing final results.
341
+ # @yieldparam [Array<Result>] results The final results.
342
+ #
343
+ # @example
344
+ # require "google/cloud/speech"
345
+ #
346
+ # speech_client = Google::Cloud::Speech.new version: :v1p1beta1
347
+ # streaming_config = {
348
+ # config: {
349
+ # encoding: :linear16,
350
+ # language_code: "en-US",
351
+ # sample_rate_hertz: 16000
352
+ # }
353
+ # }
354
+ # stream = speech_client.streaming_recognize(streaming_config)
355
+ #
356
+ # # register callback for when a final result has been received
357
+ # stream.on_result do |results|
358
+ # results.each do |result|
359
+ # puts result.alternatives.first.transcript
360
+ # end
361
+ # end
362
+ #
363
+ # # Stream 5 seconds of audio from the microphone
364
+ # # Actual implementation of microphone input varies by platform
365
+ # 5.times do
366
+ # stream.send MicrophoneInput.read(32000)
367
+ # end
368
+ #
369
+ # stream.stop
370
+ # stream.wait_until_complete!
371
+ #
372
+ # results = stream.results
373
+ # result = results.first.alternatives.first
374
+ # result.transcript #=> "how old is the Brooklyn Bridge"
375
+ # result.confidence #=> 0.9826789498329163
376
+ #
377
+ def on_result &block
378
+ synchronize do
379
+ @callbacks[:result] << block
380
+ end
381
+ end
382
+
383
+ ##
384
+ # @private add a result object, and call the callbacks
385
+ def pass_result! result_grpc
386
+ synchronize do
387
+ @results << result_grpc
388
+ @callbacks[:result].each { |c| c.call @results }
389
+ end
390
+ end
391
+
392
+ ##
393
+ # Register to be notified when the end of the audio stream has been
394
+ # reached.
395
+ #
396
+ # @yield [callback] The block to be called when the end of the audio
397
+ # stream has been reached.
398
+ #
399
+ # @example
400
+ # require "google/cloud/speech"
401
+ #
402
+ # speech_client = Google::Cloud::Speech.new version: :v1p1beta1
403
+ # streaming_config = {
404
+ # config: {
405
+ # encoding: :linear16,
406
+ # language_code: "en-US",
407
+ # sample_rate_hertz: 16000
408
+ # }
409
+ # }
410
+ # stream = speech_client.streaming_recognize(streaming_config)
411
+ #
412
+ # # register callback for when stream has ended.
413
+ # stream.on_complete do
414
+ # puts "Stream has ended."
415
+ # end
416
+ #
417
+ # # Stream 5 seconds of audio from the microphone
418
+ # # Actual implementation of microphone input varies by platform
419
+ # 5.times do
420
+ # stream.send MicrophoneInput.read(32000)
421
+ # end
422
+ #
423
+ # stream.stop
424
+ #
425
+ def on_complete &block
426
+ synchronize do
427
+ @callbacks[:complete] << block
428
+ end
429
+ end
430
+
431
+ ##
432
+ # @private yields when the end of the audio stream has been reached.
433
+ def pass_complete!
434
+ synchronize do
435
+ @complete = true
436
+ @callbacks[:complete].each(&:call)
437
+ end
438
+ end
439
+
440
+ ##
441
+ # Register to be notified when the server has detected the end of the
442
+ # user's speech utterance and expects no additional speech. Therefore,
443
+ # the server will not process additional audio. The client should stop
444
+ # sending additional audio data. This event only occurs when `utterance`
445
+ # is `true`.
446
+ #
447
+ # @yield [callback] The block to be called when the end of the audio
448
+ # stream has been reached.
449
+ #
450
+ # @example
451
+ # require "google/cloud/speech"
452
+ #
453
+ # speech_client = Google::Cloud::Speech.new version: :v1p1beta1
454
+ # streaming_config = {
455
+ # config: {
456
+ # encoding: :linear16,
457
+ # language_code: "en-US",
458
+ # sample_rate_hertz: 16000
459
+ # },
460
+ # single_utterance: true
461
+ # }
462
+ # stream = speech_client.streaming_recognize(streaming_config)
463
+ #
464
+ # # register callback for when utterance has occurred.
465
+ # stream.on_utterance do
466
+ # puts "Utterance has occurred."
467
+ # stream.stop
468
+ # end
469
+ #
470
+ # # Stream 5 seconds of audio from the microphone
471
+ # # Actual implementation of microphone input varies by platform
472
+ # 5.times do
473
+ # stream.send MicrophoneInput.read(32000)
474
+ # end
475
+ #
476
+ # stream.stop unless stream.stopped?
477
+ #
478
+ def on_utterance &block
479
+ synchronize do
480
+ @callbacks[:utterance] << block
481
+ end
482
+ end
483
+
484
+ ##
485
+ # @private returns single final result once :END_OF_SINGLE_UTTERANCE is
486
+ # received.
487
+ def pass_utterance!
488
+ synchronize do
489
+ @callbacks[:utterance].each(&:call)
490
+ end
491
+ end
492
+
493
+ ##
494
+ # Register to be notified of an error received during the stream.
495
+ #
496
+ # @yield [callback] The block for accessing final results.
497
+ # @yieldparam [Exception] error The error raised.
498
+ #
499
+ # @example
500
+ # require "google/cloud/speech"
501
+ #
502
+ # speech_client = Google::Cloud::Speech.new version: :v1p1beta1
503
+ # streaming_config = {
504
+ # config: {
505
+ # encoding: :linear16,
506
+ # language_code: "en-US",
507
+ # sample_rate_hertz: 16000
508
+ # }
509
+ # }
510
+ # stream = speech_client.streaming_recognize(streaming_config)
511
+ #
512
+ # # register callback for when an error is returned
513
+ # stream.on_error do |error|
514
+ # puts "The following error occurred while streaming: #{error}"
515
+ # stream.stop
516
+ # end
517
+ #
518
+ # # Stream 5 seconds of audio from the microphone
519
+ # # Actual implementation of microphone input varies by platform
520
+ # 5.times do
521
+ # stream.send MicrophoneInput.read(32000)
522
+ # end
523
+ #
524
+ # stream.stop
525
+ #
526
+ def on_error &block
527
+ synchronize do
528
+ @callbacks[:error] << block
529
+ end
530
+ end
531
+
532
+ # @private returns error object from the stream thread.
533
+ def error! err
534
+ synchronize do
535
+ @callbacks[:error].each { |c| c.call err }
536
+ end
537
+ end
538
+
539
+ protected
540
+
541
+ def background_run
542
+ response_enum = @streaming_call.call(@request_queue.each_item)
543
+ response_enum.each do |response|
544
+ begin
545
+ background_results response
546
+ background_event_type response.speech_event_type
547
+ background_error response.error
548
+ rescue StandardError => e
549
+ error! e
550
+ end
551
+ end
552
+ rescue StandardError => e
553
+ error! e
554
+ ensure
555
+ pass_complete!
556
+ Thread.pass
557
+ end
558
+
559
+ def background_results response
560
+ # Handle the results (StreamingRecognitionResult)
561
+ return unless response.results && response.results.any?
562
+
563
+ final_grpc = response.results.select { |res| res.is_final }
564
+ # Only one final result
565
+ final_grpc = if final_grpc.any? then final_grpc.first else nil end
566
+
567
+ interim_grpc = response.results.select { |res| !res.is_final }
568
+
569
+ # callback for interim results received
570
+ pass_interim! interim_grpc if interim_grpc.any?
571
+ # callback for final results received, if any
572
+ pass_result! final_grpc if final_grpc
573
+ end
574
+
575
+ def background_event_type event_type
576
+ # Handle the event_type by raising events
577
+ # TODO: do we automatically call stop here?
578
+ pass_utterance! if event_type == :END_OF_SINGLE_UTTERANCE
579
+ end
580
+
581
+ def background_error error
582
+ return if error.nil?
583
+
584
+ require "grpc/errors"
585
+ raise GRPC::BadStatus.new(error.code, error.message)
586
+ end
587
+
588
+ # @private
589
+ class EnumeratorQueue
590
+ extend Forwardable
591
+ def_delegators :@q, :push
592
+
593
+ # @private
594
+ def initialize sentinel
595
+ @q = Queue.new
596
+ @sentinel = sentinel
597
+ end
598
+
599
+ # @private
600
+ def each_item
601
+ return enum_for(:each_item) unless block_given?
602
+ loop do
603
+ r = @q.pop
604
+ break if r.equal? @sentinel
605
+ raise r if r.is_a? Exception
606
+ yield r
607
+ end
608
+ end
609
+ end
610
+ end
611
+ end
612
+ end
613
+ end
614
+ end