google-cloud-speech 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google-cloud-speech.rb +6 -13
- data/lib/google/cloud/speech.rb +102 -24
- data/lib/google/cloud/speech/audio.rb +73 -11
- data/lib/google/cloud/speech/job.rb +10 -15
- data/lib/google/cloud/speech/project.rb +142 -40
- data/lib/google/cloud/speech/result.rb +124 -10
- data/lib/google/cloud/speech/service.rb +33 -14
- data/lib/google/cloud/speech/stream.rb +563 -0
- data/lib/google/cloud/speech/v1beta1.rb +2 -0
- data/lib/google/cloud/speech/v1beta1/speech_api.rb +1 -0
- data/lib/google/cloud/speech/version.rb +1 -1
- metadata +8 -7
@@ -35,14 +35,15 @@ module Google
|
|
35
35
|
# recognition is correct. This field is typically provided only for the
|
36
36
|
# top hypothesis. A value of 0.0 is a sentinel value indicating
|
37
37
|
# confidence was not set.
|
38
|
-
# @attr_reader [Array<Result>] alternatives Additional
|
39
|
-
# hypotheses (up to the value specified in
|
38
|
+
# @attr_reader [Array<Result::Alternative>] alternatives Additional
|
39
|
+
# recognition hypotheses (up to the value specified in
|
40
|
+
# `max_alternatives`). The server may return fewer than
|
41
|
+
# `max_alternatives`.
|
40
42
|
#
|
41
43
|
# @example
|
42
|
-
# require "google/cloud"
|
44
|
+
# require "google/cloud/speech"
|
43
45
|
#
|
44
|
-
#
|
45
|
-
# speech = gcloud.speech
|
46
|
+
# speech = Google::Cloud::Speech.new
|
46
47
|
#
|
47
48
|
# audio = speech.audio "path/to/audio.raw",
|
48
49
|
# encoding: :raw, sample_rate: 16000
|
@@ -50,10 +51,7 @@ module Google
|
|
50
51
|
#
|
51
52
|
# result = results.first
|
52
53
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
53
|
-
# result.confidence #=>
|
54
|
-
# alternative = result.alternatives.first
|
55
|
-
# alternative.transcript #=> "how old is the Brooklyn brim"
|
56
|
-
# alternative.confidence #=> 22.39
|
54
|
+
# result.confidence #=> 0.9826789498329163
|
57
55
|
#
|
58
56
|
class Result
|
59
57
|
attr_reader :transcript, :confidence, :alternatives
|
@@ -72,10 +70,126 @@ module Google
|
|
72
70
|
head, *tail = *grpc.alternatives
|
73
71
|
return nil if head.nil?
|
74
72
|
alternatives = tail.map do |alt|
|
75
|
-
new alt.transcript, alt.confidence
|
73
|
+
Alternative.new alt.transcript, alt.confidence
|
76
74
|
end
|
77
75
|
new head.transcript, head.confidence, alternatives
|
78
76
|
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# # Result::Alternative
|
80
|
+
#
|
81
|
+
# A speech recognition result corresponding to a portion of the audio.
|
82
|
+
#
|
83
|
+
# @attr_reader [String] transcript Transcript text representing the
|
84
|
+
# words that the user spoke.
|
85
|
+
# @attr_reader [Float] confidence The confidence estimate between 0.0
|
86
|
+
# and 1.0. A higher number means the system is more confident that the
|
87
|
+
# recognition is correct. This field is typically provided only for
|
88
|
+
# the top hypothesis. A value of 0.0 is a sentinel value indicating
|
89
|
+
# confidence was not set.
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# require "google/cloud/speech"
|
93
|
+
#
|
94
|
+
# speech = Google::Cloud::Speech.new
|
95
|
+
#
|
96
|
+
# audio = speech.audio "path/to/audio.raw",
|
97
|
+
# encoding: :raw, sample_rate: 16000
|
98
|
+
# results = audio.recognize
|
99
|
+
#
|
100
|
+
# result = results.first
|
101
|
+
# result.transcript #=> "how old is the Brooklyn Bridge"
|
102
|
+
# result.confidence #=> 0.9826789498329163
|
103
|
+
# alternative = result.alternatives.first
|
104
|
+
# alternative.transcript #=> "how old is the Brooklyn brim"
|
105
|
+
# alternative.confidence #=> 0.22030000388622284
|
106
|
+
#
|
107
|
+
class Alternative
|
108
|
+
attr_reader :transcript, :confidence
|
109
|
+
|
110
|
+
##
|
111
|
+
# @private Creates a new Result::Alternative instance.
|
112
|
+
def initialize transcript, confidence
|
113
|
+
@transcript = transcript
|
114
|
+
@confidence = confidence
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
##
|
120
|
+
# # InterimResult
|
121
|
+
#
|
122
|
+
# A streaming speech recognition result corresponding to a portion of the
|
123
|
+
# audio that is currently being processed.
|
124
|
+
#
|
125
|
+
# See {Project#stream} and {Stream#on_interim}.
|
126
|
+
#
|
127
|
+
# @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.SpeechRecognitionResult
|
128
|
+
# SpeechRecognitionResult
|
129
|
+
# @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.StreamingRecognitionResult
|
130
|
+
# StreamingRecognitionResult
|
131
|
+
#
|
132
|
+
# @attr_reader [String] transcript Transcript text representing the words
|
133
|
+
# that the user spoke.
|
134
|
+
# @attr_reader [Float] confidence The confidence estimate between 0.0 and
|
135
|
+
# 1.0. A higher number means the system is more confident that the
|
136
|
+
# recognition is correct. This field is typically provided only for the
|
137
|
+
# top hypothesis. A value of 0.0 is a sentinel value indicating
|
138
|
+
# confidence was not set.
|
139
|
+
# @attr_reader [Float] stability An estimate of the probability that the
|
140
|
+
# recognizer will not change its guess about this interim result. Values
|
141
|
+
# range from 0.0 (completely unstable) to 1.0 (completely stable). Note
|
142
|
+
# that this is not the same as confidence, which estimates the
|
143
|
+
# probability that a recognition result is correct.
|
144
|
+
# @attr_reader [Array<Result::Alternative>] alternatives Additional
|
145
|
+
# recognition hypotheses (up to the value specified in
|
146
|
+
# `max_alternatives`).
|
147
|
+
#
|
148
|
+
# @example
|
149
|
+
# require "google/cloud/speech"
|
150
|
+
#
|
151
|
+
# speech = Google::Cloud::Speech.new
|
152
|
+
#
|
153
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
154
|
+
#
|
155
|
+
# # register callback for when an interim result is returned
|
156
|
+
# stream.on_interim do |final_results, interim_results|
|
157
|
+
# interim_result = interim_results.first
|
158
|
+
# puts interim_result.transcript # "how old is the Brooklyn Bridge"
|
159
|
+
# puts interim_result.confidence # 0.9826789498329163
|
160
|
+
# puts interim_result.stability # 0.8999
|
161
|
+
# end
|
162
|
+
#
|
163
|
+
# # Stream 5 seconds of audio from the microhone
|
164
|
+
# # Actual implementation of microphone input varies by platform
|
165
|
+
# 5.times.do
|
166
|
+
# stream.send MicrophoneInput.read(32000)
|
167
|
+
# end
|
168
|
+
#
|
169
|
+
# stream.stop
|
170
|
+
#
|
171
|
+
class InterimResult
|
172
|
+
attr_reader :transcript, :confidence, :stability, :alternatives
|
173
|
+
|
174
|
+
##
|
175
|
+
# @private Creates a new InterimResult instance.
|
176
|
+
def initialize transcript, confidence, stability, alternatives = []
|
177
|
+
@transcript = transcript
|
178
|
+
@confidence = confidence
|
179
|
+
@stability = stability
|
180
|
+
@alternatives = alternatives
|
181
|
+
end
|
182
|
+
|
183
|
+
##
|
184
|
+
# @private New InterimResult from a StreamingRecognitionResult object.
|
185
|
+
def self.from_grpc grpc
|
186
|
+
head, *tail = *grpc.alternatives
|
187
|
+
return nil if head.nil?
|
188
|
+
alternatives = tail.map do |alt|
|
189
|
+
Result::Alternative.new alt.transcript, alt.confidence
|
190
|
+
end
|
191
|
+
new head.transcript, head.confidence, grpc.stability, alternatives
|
192
|
+
end
|
79
193
|
end
|
80
194
|
end
|
81
195
|
end
|
@@ -39,11 +39,13 @@ module Google
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def channel
|
42
|
+
require "grpc"
|
42
43
|
GRPC::Core::Channel.new host, nil, chan_creds
|
43
44
|
end
|
44
45
|
|
45
46
|
def chan_creds
|
46
47
|
return credentials if insecure?
|
48
|
+
require "grpc"
|
47
49
|
GRPC::Core::ChannelCredentials.new.compose \
|
48
50
|
GRPC::Core::CallCredentials.new credentials.client.updater_proc
|
49
51
|
end
|
@@ -56,19 +58,21 @@ module Google
|
|
56
58
|
channel: channel,
|
57
59
|
timeout: timeout,
|
58
60
|
client_config: client_config,
|
59
|
-
app_name: "
|
61
|
+
app_name: "gcloud-ruby",
|
60
62
|
app_version: Google::Cloud::Speech::VERSION)
|
61
63
|
end
|
62
64
|
attr_accessor :mocked_service
|
63
65
|
|
64
66
|
def ops
|
65
67
|
return mocked_ops if mocked_ops
|
66
|
-
@ops ||=
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
68
|
+
@ops ||= \
|
69
|
+
Google::Longrunning::OperationsApi.new(
|
70
|
+
service_path: host,
|
71
|
+
channel: channel,
|
72
|
+
timeout: timeout,
|
73
|
+
client_config: client_config,
|
74
|
+
app_name: "gcloud-ruby",
|
75
|
+
app_version: Google::Cloud::Speech::VERSION)
|
72
76
|
end
|
73
77
|
attr_accessor :mocked_ops
|
74
78
|
|
@@ -77,16 +81,23 @@ module Google
|
|
77
81
|
end
|
78
82
|
|
79
83
|
def recognize_sync audio, config
|
80
|
-
execute
|
84
|
+
execute do
|
85
|
+
service.sync_recognize config, audio, options: default_options
|
86
|
+
end
|
81
87
|
end
|
82
88
|
|
83
89
|
def recognize_async audio, config
|
84
|
-
execute
|
90
|
+
execute do
|
91
|
+
service.async_recognize config, audio, options: default_options
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def recognize_stream request_enum
|
96
|
+
service.speech_stub.streaming_recognize request_enum
|
85
97
|
end
|
86
98
|
|
87
99
|
def get_op name
|
88
|
-
|
89
|
-
execute { ops.get_operation req }
|
100
|
+
execute { ops.get_operation name }
|
90
101
|
end
|
91
102
|
|
92
103
|
def inspect
|
@@ -95,11 +106,19 @@ module Google
|
|
95
106
|
|
96
107
|
protected
|
97
108
|
|
109
|
+
def default_headers
|
110
|
+
{ "google-cloud-resource-prefix" => "projects/#{@project}" }
|
111
|
+
end
|
112
|
+
|
113
|
+
def default_options
|
114
|
+
Google::Gax::CallOptions.new kwargs: default_headers
|
115
|
+
end
|
116
|
+
|
98
117
|
def execute
|
99
|
-
require "grpc" # Ensure GRPC is loaded before rescuing exception
|
100
118
|
yield
|
101
|
-
rescue
|
102
|
-
|
119
|
+
rescue Google::Gax::GaxError => e
|
120
|
+
# GaxError wraps BadStatus, but exposes it as #cause
|
121
|
+
raise Google::Cloud::Error.from_error(e.cause)
|
103
122
|
end
|
104
123
|
end
|
105
124
|
end
|
@@ -0,0 +1,563 @@
|
|
1
|
+
# Copyright 2016 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/speech/v1beta1"
|
17
|
+
require "google/cloud/speech/result"
|
18
|
+
require "monitor"
|
19
|
+
require "forwardable"
|
20
|
+
|
21
|
+
module Google
|
22
|
+
module Cloud
|
23
|
+
module Speech
|
24
|
+
##
|
25
|
+
# # Stream
|
26
|
+
#
|
27
|
+
# A resource that represents the streaming requests and responses.
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# require "google/cloud/speech"
|
31
|
+
#
|
32
|
+
# speech = Google::Cloud::Speech.new
|
33
|
+
#
|
34
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
35
|
+
#
|
36
|
+
# # register callback for when a result is returned
|
37
|
+
# stream.on_result do |results|
|
38
|
+
# result = results.first
|
39
|
+
# puts result.transcript # "how old is the Brooklyn Bridge"
|
40
|
+
# puts result.confidence # 0.9826789498329163
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# # Stream 5 seconds of audio from the microhone
|
44
|
+
# # Actual implementation of microphone input varies by platform
|
45
|
+
# 5.times.do
|
46
|
+
# stream.send MicrophoneInput.read(32000)
|
47
|
+
# end
|
48
|
+
#
|
49
|
+
# stream.stop
|
50
|
+
#
|
51
|
+
class Stream
|
52
|
+
include MonitorMixin
|
53
|
+
##
|
54
|
+
# @private Creates a new Speech Stream instance.
|
55
|
+
# This must always be private, since it may change as the implementation
|
56
|
+
# changes over time.
|
57
|
+
def initialize service, streaming_recognize_request
|
58
|
+
@service = service
|
59
|
+
@streaming_recognize_request = streaming_recognize_request
|
60
|
+
@results = []
|
61
|
+
@callbacks = Hash.new { |h, k| h[k] = [] }
|
62
|
+
super() # to init MonitorMixin
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Starts the stream. The stream will be started in the first #send call.
|
67
|
+
def start
|
68
|
+
return if @request_queue
|
69
|
+
@request_queue = EnumeratorQueue.new(self)
|
70
|
+
@request_queue.push @streaming_recognize_request
|
71
|
+
|
72
|
+
Thread.new { background_run }
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# Checks if the stream has been started.
|
77
|
+
#
|
78
|
+
# @return [boolean] `true` when started, `false` otherwise.
|
79
|
+
def started?
|
80
|
+
synchronize do
|
81
|
+
!(!@request_queue)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# Sends audio content to the server.
|
87
|
+
#
|
88
|
+
# @param [String] bytes A string of binary audio data to be recognized.
|
89
|
+
# The data should be encoded as `ASCII-8BIT`.
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# require "google/cloud/speech"
|
93
|
+
#
|
94
|
+
# speech = Google::Cloud::Speech.new
|
95
|
+
#
|
96
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
97
|
+
#
|
98
|
+
# # register callback for when a result is returned
|
99
|
+
# stream.on_result do |results|
|
100
|
+
# result = results.first
|
101
|
+
# puts result.transcript # "how old is the Brooklyn Bridge"
|
102
|
+
# puts result.confidence # 0.9826789498329163
|
103
|
+
# end
|
104
|
+
#
|
105
|
+
# # Stream 5 seconds of audio from the microhone
|
106
|
+
# # Actual implementation of microphone input varies by platform
|
107
|
+
# 5.times.do
|
108
|
+
# stream.send MicrophoneInput.read(32000)
|
109
|
+
# end
|
110
|
+
#
|
111
|
+
# stream.stop
|
112
|
+
#
|
113
|
+
def send bytes
|
114
|
+
start # lazily call start if the stream wasn't started yet
|
115
|
+
# TODO: do not send if stopped?
|
116
|
+
synchronize do
|
117
|
+
req = V1beta1::StreamingRecognizeRequest.new(
|
118
|
+
audio_content: bytes.encode("ASCII-8BIT"))
|
119
|
+
@request_queue.push req
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# Stops the stream. Signals to the server that no more data will be
|
125
|
+
# sent.
|
126
|
+
def stop
|
127
|
+
synchronize do
|
128
|
+
return if @request_queue.nil?
|
129
|
+
@request_queue.push self
|
130
|
+
@stopped = true
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
##
|
135
|
+
# Checks if the stream has been stopped.
|
136
|
+
#
|
137
|
+
# @return [boolean] `true` when stopped, `false` otherwise.
|
138
|
+
def stopped?
|
139
|
+
synchronize do
|
140
|
+
@stopped
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
##
|
145
|
+
# The speech recognition results for the audio.
|
146
|
+
#
|
147
|
+
# @return [Array<Result>] The transcribed text of audio recognized.
|
148
|
+
#
|
149
|
+
# @example
|
150
|
+
# require "google/cloud/speech"
|
151
|
+
#
|
152
|
+
# speech = Google::Cloud::Speech.new
|
153
|
+
#
|
154
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
155
|
+
#
|
156
|
+
# # Stream 5 seconds of audio from the microhone
|
157
|
+
# # Actual implementation of microphone input varies by platform
|
158
|
+
# 5.times.do
|
159
|
+
# stream.send MicrophoneInput.read(32000)
|
160
|
+
# end
|
161
|
+
#
|
162
|
+
# stream.stop
|
163
|
+
#
|
164
|
+
# results = stream.results
|
165
|
+
# result = results.first
|
166
|
+
# puts result.transcript # "how old is the Brooklyn Bridge"
|
167
|
+
# puts result.confidence # 0.9826789498329163
|
168
|
+
#
|
169
|
+
def results
|
170
|
+
synchronize do
|
171
|
+
@results
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
##
|
176
|
+
# Register to be notified on the reception of an interim result.
|
177
|
+
#
|
178
|
+
# @yield [callback] The block for accessing final and interim results.
|
179
|
+
# @yieldparam [Array<Result>] final_results The final results.
|
180
|
+
# @yieldparam [Array<Result>] interim_results The interim results.
|
181
|
+
#
|
182
|
+
# @example
|
183
|
+
# require "google/cloud/speech"
|
184
|
+
#
|
185
|
+
# speech = Google::Cloud::Speech.new
|
186
|
+
#
|
187
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
188
|
+
#
|
189
|
+
# # register callback for when an interim result is returned
|
190
|
+
# stream.on_interim do |final_results, interim_results|
|
191
|
+
# interim_result = interim_results.first
|
192
|
+
# puts interim_result.transcript # "how old is the Brooklyn Bridge"
|
193
|
+
# puts interim_result.confidence # 0.9826789498329163
|
194
|
+
# puts interim_result.stability # 0.8999
|
195
|
+
# end
|
196
|
+
#
|
197
|
+
# # Stream 5 seconds of audio from the microhone
|
198
|
+
# # Actual implementation of microphone input varies by platform
|
199
|
+
# 5.times.do
|
200
|
+
# stream.send MicrophoneInput.read(32000)
|
201
|
+
# end
|
202
|
+
#
|
203
|
+
# stream.stop
|
204
|
+
#
|
205
|
+
def on_interim &block
|
206
|
+
synchronize do
|
207
|
+
@callbacks[:interim] << block
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
# @private yields two arguments, all final results and the
|
212
|
+
# non-final/incomplete result
|
213
|
+
def interim! interim_results
|
214
|
+
synchronize do
|
215
|
+
@callbacks[:interim].each { |c| c.call results, interim_results }
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
##
|
220
|
+
# Register to be notified on the reception of a final result.
|
221
|
+
#
|
222
|
+
# @yield [callback] The block for accessing final results.
|
223
|
+
# @yieldparam [Array<Result>] results The final results.
|
224
|
+
#
|
225
|
+
# @example
|
226
|
+
# require "google/cloud/speech"
|
227
|
+
#
|
228
|
+
# speech = Google::Cloud::Speech.new
|
229
|
+
#
|
230
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
231
|
+
#
|
232
|
+
# # register callback for when an interim result is returned
|
233
|
+
# stream.on_result do |results|
|
234
|
+
# result = results.first
|
235
|
+
# puts result.transcript # "how old is the Brooklyn Bridge"
|
236
|
+
# puts result.confidence # 0.9826789498329163
|
237
|
+
# end
|
238
|
+
#
|
239
|
+
# # Stream 5 seconds of audio from the microhone
|
240
|
+
# # Actual implementation of microphone input varies by platform
|
241
|
+
# 5.times.do
|
242
|
+
# stream.send MicrophoneInput.read(32000)
|
243
|
+
# end
|
244
|
+
#
|
245
|
+
# stream.stop
|
246
|
+
#
|
247
|
+
def on_result &block
|
248
|
+
synchronize do
|
249
|
+
@callbacks[:result] << block
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# @private add a result object, and call the callbacks
|
254
|
+
def add_result!result_index, result_grpc
|
255
|
+
synchronize do
|
256
|
+
@results[result_index] = Result.from_grpc result_grpc
|
257
|
+
end
|
258
|
+
# callback for final result received
|
259
|
+
result!
|
260
|
+
end
|
261
|
+
|
262
|
+
# @private yields each final results as they are recieved
|
263
|
+
def result!
|
264
|
+
synchronize do
|
265
|
+
@callbacks[:result].each { |c| c.call results }
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
##
|
270
|
+
# Register to be notified when speech has been detected in the audio
|
271
|
+
# stream.
|
272
|
+
#
|
273
|
+
# @yield [callback] The block to be called when speech has been detected
|
274
|
+
# in the audio stream.
|
275
|
+
#
|
276
|
+
# @example
|
277
|
+
# require "google/cloud/speech"
|
278
|
+
#
|
279
|
+
# speech = Google::Cloud::Speech.new
|
280
|
+
#
|
281
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
282
|
+
#
|
283
|
+
# # register callback for when speech has started.
|
284
|
+
# stream.on_speech_start do
|
285
|
+
# puts "Speech has started."
|
286
|
+
# end
|
287
|
+
#
|
288
|
+
# # Stream 5 seconds of audio from the microhone
|
289
|
+
# # Actual implementation of microphone input varies by platform
|
290
|
+
# 5.times.do
|
291
|
+
# stream.send MicrophoneInput.read(32000)
|
292
|
+
# end
|
293
|
+
#
|
294
|
+
# stream.stop
|
295
|
+
#
|
296
|
+
def on_speech_start &block
|
297
|
+
synchronize do
|
298
|
+
@callbacks[:speech_start] << block
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# @private returns single final result once :END_OF_UTTERANCE is
|
303
|
+
# recieved.
|
304
|
+
def speech_start!
|
305
|
+
synchronize do
|
306
|
+
@callbacks[:speech_start].each(&:call)
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
##
|
311
|
+
# Register to be notified when speech has ceased to be detected in the
|
312
|
+
# audio stream.
|
313
|
+
#
|
314
|
+
# @yield [callback] The block to be called when speech has ceased to be
|
315
|
+
# detected in the audio stream.
|
316
|
+
#
|
317
|
+
# @example
|
318
|
+
# require "google/cloud/speech"
|
319
|
+
#
|
320
|
+
# speech = Google::Cloud::Speech.new
|
321
|
+
#
|
322
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
323
|
+
#
|
324
|
+
# # register callback for when speech has ended.
|
325
|
+
# stream.on_speech_end do
|
326
|
+
# puts "Speech has ended."
|
327
|
+
# end
|
328
|
+
#
|
329
|
+
# # Stream 5 seconds of audio from the microhone
|
330
|
+
# # Actual implementation of microphone input varies by platform
|
331
|
+
# 5.times.do
|
332
|
+
# stream.send MicrophoneInput.read(32000)
|
333
|
+
# end
|
334
|
+
#
|
335
|
+
# stream.stop
|
336
|
+
#
|
337
|
+
def on_speech_end &block
|
338
|
+
synchronize do
|
339
|
+
@callbacks[:speech_end] << block
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
# @private yields single final result once :END_OF_UTTERANCE is
|
344
|
+
# recieved.
|
345
|
+
def speech_end!
|
346
|
+
synchronize do
|
347
|
+
@callbacks[:speech_end].each(&:call)
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
##
|
352
|
+
# Register to be notified when the end of the audio stream has been
|
353
|
+
# reached.
|
354
|
+
#
|
355
|
+
# @yield [callback] The block to be called when the end of the audio
|
356
|
+
# stream has been reached.
|
357
|
+
#
|
358
|
+
# @example
|
359
|
+
# require "google/cloud/speech"
|
360
|
+
#
|
361
|
+
# speech = Google::Cloud::Speech.new
|
362
|
+
#
|
363
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
364
|
+
#
|
365
|
+
# # register callback for when audio has ended.
|
366
|
+
# stream.on_complete do
|
367
|
+
# puts "Audio has ended."
|
368
|
+
# end
|
369
|
+
#
|
370
|
+
# # Stream 5 seconds of audio from the microhone
|
371
|
+
# # Actual implementation of microphone input varies by platform
|
372
|
+
# 5.times.do
|
373
|
+
# stream.send MicrophoneInput.read(32000)
|
374
|
+
# end
|
375
|
+
#
|
376
|
+
# stream.stop
|
377
|
+
#
|
378
|
+
def on_complete &block
|
379
|
+
synchronize do
|
380
|
+
@callbacks[:complete] << block
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
# @private yields all final results once the recognition is completed
|
385
|
+
# depending on how the Stream is configured, this can be on the
|
386
|
+
# reception of :END_OF_AUDIO or :END_OF_UTTERANCE.
|
387
|
+
def complete!
|
388
|
+
synchronize do
|
389
|
+
@callbacks[:complete].each(&:call)
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
##
|
394
|
+
# Register to be notified when the server has detected the end of the
|
395
|
+
# user's speech utterance and expects no additional speech. Therefore,
|
396
|
+
# the server will not process additional audio. The client should stop
|
397
|
+
# sending additional audio data. This event only occurs when `utterance`
|
398
|
+
# is `true`.
|
399
|
+
#
|
400
|
+
# @yield [callback] The block to be called when the end of the audio
|
401
|
+
# stream has been reached.
|
402
|
+
#
|
403
|
+
# @example
|
404
|
+
# require "google/cloud/speech"
|
405
|
+
#
|
406
|
+
# speech = Google::Cloud::Speech.new
|
407
|
+
#
|
408
|
+
# stream = audio.stream encoding: :raw,
|
409
|
+
# sample_rate: 16000,
|
410
|
+
# utterance: true
|
411
|
+
#
|
412
|
+
# # register callback for when utterance has occurred.
|
413
|
+
# stream.on_utterance do
|
414
|
+
# puts "Utterance has occurred."
|
415
|
+
# stream.stop
|
416
|
+
# end
|
417
|
+
#
|
418
|
+
# # Stream 5 seconds of audio from the microhone
|
419
|
+
# # Actual implementation of microphone input varies by platform
|
420
|
+
# 5.times.do
|
421
|
+
# stream.send MicrophoneInput.read(32000)
|
422
|
+
# end
|
423
|
+
#
|
424
|
+
# stream.stop unless stream.stopped?
|
425
|
+
#
|
426
|
+
def on_utterance &block
|
427
|
+
synchronize do
|
428
|
+
@callbacks[:utterance] << block
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
432
|
+
# @private returns single final result once :END_OF_UTTERANCE is
|
433
|
+
# recieved.
|
434
|
+
def utterance!
|
435
|
+
synchronize do
|
436
|
+
@callbacks[:utterance].each(&:call)
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
##
|
441
|
+
# Register to be notified of an error recieved during the stream.
|
442
|
+
#
|
443
|
+
# @yield [callback] The block for accessing final results.
|
444
|
+
# @yieldparam [Exception] error The error raised.
|
445
|
+
#
|
446
|
+
# @example
|
447
|
+
# require "google/cloud/speech"
|
448
|
+
#
|
449
|
+
# speech = Google::Cloud::Speech.new
|
450
|
+
#
|
451
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
452
|
+
#
|
453
|
+
# # register callback for when an error is returned
|
454
|
+
# stream.on_error do |error|
|
455
|
+
# puts "The following error occurred while streaming: #{error}"
|
456
|
+
# stream.stop
|
457
|
+
# end
|
458
|
+
#
|
459
|
+
# # Stream 5 seconds of audio from the microhone
|
460
|
+
# # Actual implementation of microphone input varies by platform
|
461
|
+
# 5.times.do
|
462
|
+
# stream.send MicrophoneInput.read(32000)
|
463
|
+
# end
|
464
|
+
#
|
465
|
+
# stream.stop
|
466
|
+
#
|
467
|
+
def on_error &block
|
468
|
+
synchronize do
|
469
|
+
@callbacks[:error] << block
|
470
|
+
end
|
471
|
+
end
|
472
|
+
|
473
|
+
# @private returns error object from the stream thread.
|
474
|
+
def error! err
|
475
|
+
synchronize do
|
476
|
+
@callbacks[:error].each { |c| c.call err }
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
protected
|
481
|
+
|
482
|
+
def background_run
|
483
|
+
response_enum = @service.recognize_stream @request_queue.each_item
|
484
|
+
response_enum.each do |response|
|
485
|
+
begin
|
486
|
+
background_results response
|
487
|
+
background_endpointer response.endpointer_type
|
488
|
+
background_error response.error
|
489
|
+
rescue => e
|
490
|
+
error! Google::Cloud::Error.from_error(e)
|
491
|
+
end
|
492
|
+
end
|
493
|
+
Thread.pass
|
494
|
+
end
|
495
|
+
|
496
|
+
def background_results response
|
497
|
+
# Handle the results (StreamingRecognitionResult)
|
498
|
+
return unless response.results && response.results.any?
|
499
|
+
|
500
|
+
final_grpc, interim_grpcs = *response.results
|
501
|
+
if final_grpc && final_grpc.is_final
|
502
|
+
add_result! response.result_index, final_grpc
|
503
|
+
else
|
504
|
+
# all results are interim
|
505
|
+
interim_grpcs = response.results
|
506
|
+
end
|
507
|
+
|
508
|
+
# convert to Speech object from GRPC object
|
509
|
+
interim_results = Array(interim_grpcs).map do |grpc|
|
510
|
+
InterimResult.from_grpc grpc
|
511
|
+
end
|
512
|
+
# callback for interim results received
|
513
|
+
interim! interim_results if interim_results.any?
|
514
|
+
end
|
515
|
+
|
516
|
+
def background_endpointer endpointer
|
517
|
+
# Handle the endpointer by raising events
|
518
|
+
if endpointer == :START_OF_SPEECH
|
519
|
+
speech_start!
|
520
|
+
elsif endpointer == :END_OF_SPEECH
|
521
|
+
speech_end!
|
522
|
+
elsif endpointer == :END_OF_AUDIO
|
523
|
+
# TODO: do we automatically call stop here?
|
524
|
+
complete!
|
525
|
+
elsif endpointer == :END_OF_UTTERANCE
|
526
|
+
# TODO: do we automatically call stop here?
|
527
|
+
utterance!
|
528
|
+
end
|
529
|
+
end
|
530
|
+
|
531
|
+
def background_error error
|
532
|
+
return if error.nil?
|
533
|
+
|
534
|
+
require "grpc/errors"
|
535
|
+
fail GRPC::BadStatus.new(error.code, error.message)
|
536
|
+
end
|
537
|
+
|
538
|
+
# @private
|
539
|
+
class EnumeratorQueue
|
540
|
+
extend Forwardable
|
541
|
+
def_delegators :@q, :push
|
542
|
+
|
543
|
+
# @private
|
544
|
+
def initialize sentinel
|
545
|
+
@q = Queue.new
|
546
|
+
@sentinel = sentinel
|
547
|
+
end
|
548
|
+
|
549
|
+
# @private
|
550
|
+
def each_item
|
551
|
+
return enum_for(:each_item) unless block_given?
|
552
|
+
loop do
|
553
|
+
r = @q.pop
|
554
|
+
break if r.equal? @sentinel
|
555
|
+
fail r if r.is_a? Exception
|
556
|
+
yield r
|
557
|
+
end
|
558
|
+
end
|
559
|
+
end
|
560
|
+
end
|
561
|
+
end
|
562
|
+
end
|
563
|
+
end
|