google-cloud-speech 0.20.0 → 0.21.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/google-cloud-speech.rb +6 -13
- data/lib/google/cloud/speech.rb +102 -24
- data/lib/google/cloud/speech/audio.rb +73 -11
- data/lib/google/cloud/speech/job.rb +10 -15
- data/lib/google/cloud/speech/project.rb +142 -40
- data/lib/google/cloud/speech/result.rb +124 -10
- data/lib/google/cloud/speech/service.rb +33 -14
- data/lib/google/cloud/speech/stream.rb +563 -0
- data/lib/google/cloud/speech/v1beta1.rb +2 -0
- data/lib/google/cloud/speech/v1beta1/speech_api.rb +1 -0
- data/lib/google/cloud/speech/version.rb +1 -1
- metadata +8 -7
@@ -35,14 +35,15 @@ module Google
|
|
35
35
|
# recognition is correct. This field is typically provided only for the
|
36
36
|
# top hypothesis. A value of 0.0 is a sentinel value indicating
|
37
37
|
# confidence was not set.
|
38
|
-
# @attr_reader [Array<Result>] alternatives Additional
|
39
|
-
# hypotheses (up to the value specified in
|
38
|
+
# @attr_reader [Array<Result::Alternative>] alternatives Additional
|
39
|
+
# recognition hypotheses (up to the value specified in
|
40
|
+
# `max_alternatives`). The server may return fewer than
|
41
|
+
# `max_alternatives`.
|
40
42
|
#
|
41
43
|
# @example
|
42
|
-
# require "google/cloud"
|
44
|
+
# require "google/cloud/speech"
|
43
45
|
#
|
44
|
-
#
|
45
|
-
# speech = gcloud.speech
|
46
|
+
# speech = Google::Cloud::Speech.new
|
46
47
|
#
|
47
48
|
# audio = speech.audio "path/to/audio.raw",
|
48
49
|
# encoding: :raw, sample_rate: 16000
|
@@ -50,10 +51,7 @@ module Google
|
|
50
51
|
#
|
51
52
|
# result = results.first
|
52
53
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
53
|
-
# result.confidence #=>
|
54
|
-
# alternative = result.alternatives.first
|
55
|
-
# alternative.transcript #=> "how old is the Brooklyn brim"
|
56
|
-
# alternative.confidence #=> 22.39
|
54
|
+
# result.confidence #=> 0.9826789498329163
|
57
55
|
#
|
58
56
|
class Result
|
59
57
|
attr_reader :transcript, :confidence, :alternatives
|
@@ -72,10 +70,126 @@ module Google
|
|
72
70
|
head, *tail = *grpc.alternatives
|
73
71
|
return nil if head.nil?
|
74
72
|
alternatives = tail.map do |alt|
|
75
|
-
new alt.transcript, alt.confidence
|
73
|
+
Alternative.new alt.transcript, alt.confidence
|
76
74
|
end
|
77
75
|
new head.transcript, head.confidence, alternatives
|
78
76
|
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# # Result::Alternative
|
80
|
+
#
|
81
|
+
# A speech recognition result corresponding to a portion of the audio.
|
82
|
+
#
|
83
|
+
# @attr_reader [String] transcript Transcript text representing the
|
84
|
+
# words that the user spoke.
|
85
|
+
# @attr_reader [Float] confidence The confidence estimate between 0.0
|
86
|
+
# and 1.0. A higher number means the system is more confident that the
|
87
|
+
# recognition is correct. This field is typically provided only for
|
88
|
+
# the top hypothesis. A value of 0.0 is a sentinel value indicating
|
89
|
+
# confidence was not set.
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# require "google/cloud/speech"
|
93
|
+
#
|
94
|
+
# speech = Google::Cloud::Speech.new
|
95
|
+
#
|
96
|
+
# audio = speech.audio "path/to/audio.raw",
|
97
|
+
# encoding: :raw, sample_rate: 16000
|
98
|
+
# results = audio.recognize
|
99
|
+
#
|
100
|
+
# result = results.first
|
101
|
+
# result.transcript #=> "how old is the Brooklyn Bridge"
|
102
|
+
# result.confidence #=> 0.9826789498329163
|
103
|
+
# alternative = result.alternatives.first
|
104
|
+
# alternative.transcript #=> "how old is the Brooklyn brim"
|
105
|
+
# alternative.confidence #=> 0.22030000388622284
|
106
|
+
#
|
107
|
+
class Alternative
|
108
|
+
attr_reader :transcript, :confidence
|
109
|
+
|
110
|
+
##
|
111
|
+
# @private Creates a new Result::Alternative instance.
|
112
|
+
def initialize transcript, confidence
|
113
|
+
@transcript = transcript
|
114
|
+
@confidence = confidence
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
##
|
120
|
+
# # InterimResult
|
121
|
+
#
|
122
|
+
# A streaming speech recognition result corresponding to a portion of the
|
123
|
+
# audio that is currently being processed.
|
124
|
+
#
|
125
|
+
# See {Project#stream} and {Stream#on_interim}.
|
126
|
+
#
|
127
|
+
# @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.SpeechRecognitionResult
|
128
|
+
# SpeechRecognitionResult
|
129
|
+
# @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.StreamingRecognitionResult
|
130
|
+
# StreamingRecognitionResult
|
131
|
+
#
|
132
|
+
# @attr_reader [String] transcript Transcript text representing the words
|
133
|
+
# that the user spoke.
|
134
|
+
# @attr_reader [Float] confidence The confidence estimate between 0.0 and
|
135
|
+
# 1.0. A higher number means the system is more confident that the
|
136
|
+
# recognition is correct. This field is typically provided only for the
|
137
|
+
# top hypothesis. A value of 0.0 is a sentinel value indicating
|
138
|
+
# confidence was not set.
|
139
|
+
# @attr_reader [Float] stability An estimate of the probability that the
|
140
|
+
# recognizer will not change its guess about this interim result. Values
|
141
|
+
# range from 0.0 (completely unstable) to 1.0 (completely stable). Note
|
142
|
+
# that this is not the same as confidence, which estimates the
|
143
|
+
# probability that a recognition result is correct.
|
144
|
+
# @attr_reader [Array<Result::Alternative>] alternatives Additional
|
145
|
+
# recognition hypotheses (up to the value specified in
|
146
|
+
# `max_alternatives`).
|
147
|
+
#
|
148
|
+
# @example
|
149
|
+
# require "google/cloud/speech"
|
150
|
+
#
|
151
|
+
# speech = Google::Cloud::Speech.new
|
152
|
+
#
|
153
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
154
|
+
#
|
155
|
+
# # register callback for when an interim result is returned
|
156
|
+
# stream.on_interim do |final_results, interim_results|
|
157
|
+
# interim_result = interim_results.first
|
158
|
+
# puts interim_result.transcript # "how old is the Brooklyn Bridge"
|
159
|
+
# puts interim_result.confidence # 0.9826789498329163
|
160
|
+
# puts interim_result.stability # 0.8999
|
161
|
+
# end
|
162
|
+
#
|
163
|
+
# # Stream 5 seconds of audio from the microhone
|
164
|
+
# # Actual implementation of microphone input varies by platform
|
165
|
+
# 5.times.do
|
166
|
+
# stream.send MicrophoneInput.read(32000)
|
167
|
+
# end
|
168
|
+
#
|
169
|
+
# stream.stop
|
170
|
+
#
|
171
|
+
class InterimResult
|
172
|
+
attr_reader :transcript, :confidence, :stability, :alternatives
|
173
|
+
|
174
|
+
##
|
175
|
+
# @private Creates a new InterimResult instance.
|
176
|
+
def initialize transcript, confidence, stability, alternatives = []
|
177
|
+
@transcript = transcript
|
178
|
+
@confidence = confidence
|
179
|
+
@stability = stability
|
180
|
+
@alternatives = alternatives
|
181
|
+
end
|
182
|
+
|
183
|
+
##
|
184
|
+
# @private New InterimResult from a StreamingRecognitionResult object.
|
185
|
+
def self.from_grpc grpc
|
186
|
+
head, *tail = *grpc.alternatives
|
187
|
+
return nil if head.nil?
|
188
|
+
alternatives = tail.map do |alt|
|
189
|
+
Result::Alternative.new alt.transcript, alt.confidence
|
190
|
+
end
|
191
|
+
new head.transcript, head.confidence, grpc.stability, alternatives
|
192
|
+
end
|
79
193
|
end
|
80
194
|
end
|
81
195
|
end
|
@@ -39,11 +39,13 @@ module Google
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def channel
|
42
|
+
require "grpc"
|
42
43
|
GRPC::Core::Channel.new host, nil, chan_creds
|
43
44
|
end
|
44
45
|
|
45
46
|
def chan_creds
|
46
47
|
return credentials if insecure?
|
48
|
+
require "grpc"
|
47
49
|
GRPC::Core::ChannelCredentials.new.compose \
|
48
50
|
GRPC::Core::CallCredentials.new credentials.client.updater_proc
|
49
51
|
end
|
@@ -56,19 +58,21 @@ module Google
|
|
56
58
|
channel: channel,
|
57
59
|
timeout: timeout,
|
58
60
|
client_config: client_config,
|
59
|
-
app_name: "
|
61
|
+
app_name: "gcloud-ruby",
|
60
62
|
app_version: Google::Cloud::Speech::VERSION)
|
61
63
|
end
|
62
64
|
attr_accessor :mocked_service
|
63
65
|
|
64
66
|
def ops
|
65
67
|
return mocked_ops if mocked_ops
|
66
|
-
@ops ||=
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
68
|
+
@ops ||= \
|
69
|
+
Google::Longrunning::OperationsApi.new(
|
70
|
+
service_path: host,
|
71
|
+
channel: channel,
|
72
|
+
timeout: timeout,
|
73
|
+
client_config: client_config,
|
74
|
+
app_name: "gcloud-ruby",
|
75
|
+
app_version: Google::Cloud::Speech::VERSION)
|
72
76
|
end
|
73
77
|
attr_accessor :mocked_ops
|
74
78
|
|
@@ -77,16 +81,23 @@ module Google
|
|
77
81
|
end
|
78
82
|
|
79
83
|
def recognize_sync audio, config
|
80
|
-
execute
|
84
|
+
execute do
|
85
|
+
service.sync_recognize config, audio, options: default_options
|
86
|
+
end
|
81
87
|
end
|
82
88
|
|
83
89
|
def recognize_async audio, config
|
84
|
-
execute
|
90
|
+
execute do
|
91
|
+
service.async_recognize config, audio, options: default_options
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def recognize_stream request_enum
|
96
|
+
service.speech_stub.streaming_recognize request_enum
|
85
97
|
end
|
86
98
|
|
87
99
|
def get_op name
|
88
|
-
|
89
|
-
execute { ops.get_operation req }
|
100
|
+
execute { ops.get_operation name }
|
90
101
|
end
|
91
102
|
|
92
103
|
def inspect
|
@@ -95,11 +106,19 @@ module Google
|
|
95
106
|
|
96
107
|
protected
|
97
108
|
|
109
|
+
def default_headers
|
110
|
+
{ "google-cloud-resource-prefix" => "projects/#{@project}" }
|
111
|
+
end
|
112
|
+
|
113
|
+
def default_options
|
114
|
+
Google::Gax::CallOptions.new kwargs: default_headers
|
115
|
+
end
|
116
|
+
|
98
117
|
def execute
|
99
|
-
require "grpc" # Ensure GRPC is loaded before rescuing exception
|
100
118
|
yield
|
101
|
-
rescue
|
102
|
-
|
119
|
+
rescue Google::Gax::GaxError => e
|
120
|
+
# GaxError wraps BadStatus, but exposes it as #cause
|
121
|
+
raise Google::Cloud::Error.from_error(e.cause)
|
103
122
|
end
|
104
123
|
end
|
105
124
|
end
|
@@ -0,0 +1,563 @@
|
|
1
|
+
# Copyright 2016 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/speech/v1beta1"
|
17
|
+
require "google/cloud/speech/result"
|
18
|
+
require "monitor"
|
19
|
+
require "forwardable"
|
20
|
+
|
21
|
+
module Google
|
22
|
+
module Cloud
|
23
|
+
module Speech
|
24
|
+
##
|
25
|
+
# # Stream
|
26
|
+
#
|
27
|
+
# A resource that represents the streaming requests and responses.
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# require "google/cloud/speech"
|
31
|
+
#
|
32
|
+
# speech = Google::Cloud::Speech.new
|
33
|
+
#
|
34
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
35
|
+
#
|
36
|
+
# # register callback for when a result is returned
|
37
|
+
# stream.on_result do |results|
|
38
|
+
# result = results.first
|
39
|
+
# puts result.transcript # "how old is the Brooklyn Bridge"
|
40
|
+
# puts result.confidence # 0.9826789498329163
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# # Stream 5 seconds of audio from the microhone
|
44
|
+
# # Actual implementation of microphone input varies by platform
|
45
|
+
# 5.times.do
|
46
|
+
# stream.send MicrophoneInput.read(32000)
|
47
|
+
# end
|
48
|
+
#
|
49
|
+
# stream.stop
|
50
|
+
#
|
51
|
+
class Stream
|
52
|
+
include MonitorMixin
|
53
|
+
##
|
54
|
+
# @private Creates a new Speech Stream instance.
|
55
|
+
# This must always be private, since it may change as the implementation
|
56
|
+
# changes over time.
|
57
|
+
def initialize service, streaming_recognize_request
|
58
|
+
@service = service
|
59
|
+
@streaming_recognize_request = streaming_recognize_request
|
60
|
+
@results = []
|
61
|
+
@callbacks = Hash.new { |h, k| h[k] = [] }
|
62
|
+
super() # to init MonitorMixin
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Starts the stream. The stream will be started in the first #send call.
|
67
|
+
def start
|
68
|
+
return if @request_queue
|
69
|
+
@request_queue = EnumeratorQueue.new(self)
|
70
|
+
@request_queue.push @streaming_recognize_request
|
71
|
+
|
72
|
+
Thread.new { background_run }
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# Checks if the stream has been started.
|
77
|
+
#
|
78
|
+
# @return [boolean] `true` when started, `false` otherwise.
|
79
|
+
def started?
|
80
|
+
synchronize do
|
81
|
+
!(!@request_queue)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# Sends audio content to the server.
|
87
|
+
#
|
88
|
+
# @param [String] bytes A string of binary audio data to be recognized.
|
89
|
+
# The data should be encoded as `ASCII-8BIT`.
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# require "google/cloud/speech"
|
93
|
+
#
|
94
|
+
# speech = Google::Cloud::Speech.new
|
95
|
+
#
|
96
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
97
|
+
#
|
98
|
+
# # register callback for when a result is returned
|
99
|
+
# stream.on_result do |results|
|
100
|
+
# result = results.first
|
101
|
+
# puts result.transcript # "how old is the Brooklyn Bridge"
|
102
|
+
# puts result.confidence # 0.9826789498329163
|
103
|
+
# end
|
104
|
+
#
|
105
|
+
# # Stream 5 seconds of audio from the microhone
|
106
|
+
# # Actual implementation of microphone input varies by platform
|
107
|
+
# 5.times.do
|
108
|
+
# stream.send MicrophoneInput.read(32000)
|
109
|
+
# end
|
110
|
+
#
|
111
|
+
# stream.stop
|
112
|
+
#
|
113
|
+
def send bytes
|
114
|
+
start # lazily call start if the stream wasn't started yet
|
115
|
+
# TODO: do not send if stopped?
|
116
|
+
synchronize do
|
117
|
+
req = V1beta1::StreamingRecognizeRequest.new(
|
118
|
+
audio_content: bytes.encode("ASCII-8BIT"))
|
119
|
+
@request_queue.push req
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# Stops the stream. Signals to the server that no more data will be
|
125
|
+
# sent.
|
126
|
+
def stop
|
127
|
+
synchronize do
|
128
|
+
return if @request_queue.nil?
|
129
|
+
@request_queue.push self
|
130
|
+
@stopped = true
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
##
|
135
|
+
# Checks if the stream has been stopped.
|
136
|
+
#
|
137
|
+
# @return [boolean] `true` when stopped, `false` otherwise.
|
138
|
+
def stopped?
|
139
|
+
synchronize do
|
140
|
+
@stopped
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
##
|
145
|
+
# The speech recognition results for the audio.
|
146
|
+
#
|
147
|
+
# @return [Array<Result>] The transcribed text of audio recognized.
|
148
|
+
#
|
149
|
+
# @example
|
150
|
+
# require "google/cloud/speech"
|
151
|
+
#
|
152
|
+
# speech = Google::Cloud::Speech.new
|
153
|
+
#
|
154
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
155
|
+
#
|
156
|
+
# # Stream 5 seconds of audio from the microhone
|
157
|
+
# # Actual implementation of microphone input varies by platform
|
158
|
+
# 5.times.do
|
159
|
+
# stream.send MicrophoneInput.read(32000)
|
160
|
+
# end
|
161
|
+
#
|
162
|
+
# stream.stop
|
163
|
+
#
|
164
|
+
# results = stream.results
|
165
|
+
# result = results.first
|
166
|
+
# puts result.transcript # "how old is the Brooklyn Bridge"
|
167
|
+
# puts result.confidence # 0.9826789498329163
|
168
|
+
#
|
169
|
+
def results
|
170
|
+
synchronize do
|
171
|
+
@results
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
##
|
176
|
+
# Register to be notified on the reception of an interim result.
|
177
|
+
#
|
178
|
+
# @yield [callback] The block for accessing final and interim results.
|
179
|
+
# @yieldparam [Array<Result>] final_results The final results.
|
180
|
+
# @yieldparam [Array<Result>] interim_results The interim results.
|
181
|
+
#
|
182
|
+
# @example
|
183
|
+
# require "google/cloud/speech"
|
184
|
+
#
|
185
|
+
# speech = Google::Cloud::Speech.new
|
186
|
+
#
|
187
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
188
|
+
#
|
189
|
+
# # register callback for when an interim result is returned
|
190
|
+
# stream.on_interim do |final_results, interim_results|
|
191
|
+
# interim_result = interim_results.first
|
192
|
+
# puts interim_result.transcript # "how old is the Brooklyn Bridge"
|
193
|
+
# puts interim_result.confidence # 0.9826789498329163
|
194
|
+
# puts interim_result.stability # 0.8999
|
195
|
+
# end
|
196
|
+
#
|
197
|
+
# # Stream 5 seconds of audio from the microhone
|
198
|
+
# # Actual implementation of microphone input varies by platform
|
199
|
+
# 5.times.do
|
200
|
+
# stream.send MicrophoneInput.read(32000)
|
201
|
+
# end
|
202
|
+
#
|
203
|
+
# stream.stop
|
204
|
+
#
|
205
|
+
def on_interim &block
|
206
|
+
synchronize do
|
207
|
+
@callbacks[:interim] << block
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
# @private yields two arguments, all final results and the
|
212
|
+
# non-final/incomplete result
|
213
|
+
def interim! interim_results
|
214
|
+
synchronize do
|
215
|
+
@callbacks[:interim].each { |c| c.call results, interim_results }
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
##
|
220
|
+
# Register to be notified on the reception of a final result.
|
221
|
+
#
|
222
|
+
# @yield [callback] The block for accessing final results.
|
223
|
+
# @yieldparam [Array<Result>] results The final results.
|
224
|
+
#
|
225
|
+
# @example
|
226
|
+
# require "google/cloud/speech"
|
227
|
+
#
|
228
|
+
# speech = Google::Cloud::Speech.new
|
229
|
+
#
|
230
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
231
|
+
#
|
232
|
+
# # register callback for when an interim result is returned
|
233
|
+
# stream.on_result do |results|
|
234
|
+
# result = results.first
|
235
|
+
# puts result.transcript # "how old is the Brooklyn Bridge"
|
236
|
+
# puts result.confidence # 0.9826789498329163
|
237
|
+
# end
|
238
|
+
#
|
239
|
+
# # Stream 5 seconds of audio from the microhone
|
240
|
+
# # Actual implementation of microphone input varies by platform
|
241
|
+
# 5.times.do
|
242
|
+
# stream.send MicrophoneInput.read(32000)
|
243
|
+
# end
|
244
|
+
#
|
245
|
+
# stream.stop
|
246
|
+
#
|
247
|
+
def on_result &block
|
248
|
+
synchronize do
|
249
|
+
@callbacks[:result] << block
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# @private add a result object, and call the callbacks
|
254
|
+
def add_result!result_index, result_grpc
|
255
|
+
synchronize do
|
256
|
+
@results[result_index] = Result.from_grpc result_grpc
|
257
|
+
end
|
258
|
+
# callback for final result received
|
259
|
+
result!
|
260
|
+
end
|
261
|
+
|
262
|
+
# @private yields each final results as they are recieved
|
263
|
+
def result!
|
264
|
+
synchronize do
|
265
|
+
@callbacks[:result].each { |c| c.call results }
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
##
|
270
|
+
# Register to be notified when speech has been detected in the audio
|
271
|
+
# stream.
|
272
|
+
#
|
273
|
+
# @yield [callback] The block to be called when speech has been detected
|
274
|
+
# in the audio stream.
|
275
|
+
#
|
276
|
+
# @example
|
277
|
+
# require "google/cloud/speech"
|
278
|
+
#
|
279
|
+
# speech = Google::Cloud::Speech.new
|
280
|
+
#
|
281
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
282
|
+
#
|
283
|
+
# # register callback for when speech has started.
|
284
|
+
# stream.on_speech_start do
|
285
|
+
# puts "Speech has started."
|
286
|
+
# end
|
287
|
+
#
|
288
|
+
# # Stream 5 seconds of audio from the microhone
|
289
|
+
# # Actual implementation of microphone input varies by platform
|
290
|
+
# 5.times.do
|
291
|
+
# stream.send MicrophoneInput.read(32000)
|
292
|
+
# end
|
293
|
+
#
|
294
|
+
# stream.stop
|
295
|
+
#
|
296
|
+
def on_speech_start &block
|
297
|
+
synchronize do
|
298
|
+
@callbacks[:speech_start] << block
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# @private returns single final result once :END_OF_UTTERANCE is
|
303
|
+
# recieved.
|
304
|
+
def speech_start!
|
305
|
+
synchronize do
|
306
|
+
@callbacks[:speech_start].each(&:call)
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
##
|
311
|
+
# Register to be notified when speech has ceased to be detected in the
|
312
|
+
# audio stream.
|
313
|
+
#
|
314
|
+
# @yield [callback] The block to be called when speech has ceased to be
|
315
|
+
# detected in the audio stream.
|
316
|
+
#
|
317
|
+
# @example
|
318
|
+
# require "google/cloud/speech"
|
319
|
+
#
|
320
|
+
# speech = Google::Cloud::Speech.new
|
321
|
+
#
|
322
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
323
|
+
#
|
324
|
+
# # register callback for when speech has ended.
|
325
|
+
# stream.on_speech_end do
|
326
|
+
# puts "Speech has ended."
|
327
|
+
# end
|
328
|
+
#
|
329
|
+
# # Stream 5 seconds of audio from the microhone
|
330
|
+
# # Actual implementation of microphone input varies by platform
|
331
|
+
# 5.times.do
|
332
|
+
# stream.send MicrophoneInput.read(32000)
|
333
|
+
# end
|
334
|
+
#
|
335
|
+
# stream.stop
|
336
|
+
#
|
337
|
+
def on_speech_end &block
|
338
|
+
synchronize do
|
339
|
+
@callbacks[:speech_end] << block
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
# @private yields single final result once :END_OF_UTTERANCE is
|
344
|
+
# recieved.
|
345
|
+
def speech_end!
|
346
|
+
synchronize do
|
347
|
+
@callbacks[:speech_end].each(&:call)
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
##
|
352
|
+
# Register to be notified when the end of the audio stream has been
|
353
|
+
# reached.
|
354
|
+
#
|
355
|
+
# @yield [callback] The block to be called when the end of the audio
|
356
|
+
# stream has been reached.
|
357
|
+
#
|
358
|
+
# @example
|
359
|
+
# require "google/cloud/speech"
|
360
|
+
#
|
361
|
+
# speech = Google::Cloud::Speech.new
|
362
|
+
#
|
363
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
364
|
+
#
|
365
|
+
# # register callback for when audio has ended.
|
366
|
+
# stream.on_complete do
|
367
|
+
# puts "Audio has ended."
|
368
|
+
# end
|
369
|
+
#
|
370
|
+
# # Stream 5 seconds of audio from the microhone
|
371
|
+
# # Actual implementation of microphone input varies by platform
|
372
|
+
# 5.times.do
|
373
|
+
# stream.send MicrophoneInput.read(32000)
|
374
|
+
# end
|
375
|
+
#
|
376
|
+
# stream.stop
|
377
|
+
#
|
378
|
+
def on_complete &block
|
379
|
+
synchronize do
|
380
|
+
@callbacks[:complete] << block
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
# @private yields all final results once the recognition is completed
|
385
|
+
# depending on how the Stream is configured, this can be on the
|
386
|
+
# reception of :END_OF_AUDIO or :END_OF_UTTERANCE.
|
387
|
+
def complete!
|
388
|
+
synchronize do
|
389
|
+
@callbacks[:complete].each(&:call)
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
##
|
394
|
+
# Register to be notified when the server has detected the end of the
|
395
|
+
# user's speech utterance and expects no additional speech. Therefore,
|
396
|
+
# the server will not process additional audio. The client should stop
|
397
|
+
# sending additional audio data. This event only occurs when `utterance`
|
398
|
+
# is `true`.
|
399
|
+
#
|
400
|
+
# @yield [callback] The block to be called when the end of the audio
|
401
|
+
# stream has been reached.
|
402
|
+
#
|
403
|
+
# @example
|
404
|
+
# require "google/cloud/speech"
|
405
|
+
#
|
406
|
+
# speech = Google::Cloud::Speech.new
|
407
|
+
#
|
408
|
+
# stream = audio.stream encoding: :raw,
|
409
|
+
# sample_rate: 16000,
|
410
|
+
# utterance: true
|
411
|
+
#
|
412
|
+
# # register callback for when utterance has occurred.
|
413
|
+
# stream.on_utterance do
|
414
|
+
# puts "Utterance has occurred."
|
415
|
+
# stream.stop
|
416
|
+
# end
|
417
|
+
#
|
418
|
+
# # Stream 5 seconds of audio from the microhone
|
419
|
+
# # Actual implementation of microphone input varies by platform
|
420
|
+
# 5.times.do
|
421
|
+
# stream.send MicrophoneInput.read(32000)
|
422
|
+
# end
|
423
|
+
#
|
424
|
+
# stream.stop unless stream.stopped?
|
425
|
+
#
|
426
|
+
def on_utterance &block
|
427
|
+
synchronize do
|
428
|
+
@callbacks[:utterance] << block
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
432
|
+
# @private returns single final result once :END_OF_UTTERANCE is
|
433
|
+
# recieved.
|
434
|
+
def utterance!
|
435
|
+
synchronize do
|
436
|
+
@callbacks[:utterance].each(&:call)
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
##
|
441
|
+
# Register to be notified of an error recieved during the stream.
|
442
|
+
#
|
443
|
+
# @yield [callback] The block for accessing final results.
|
444
|
+
# @yieldparam [Exception] error The error raised.
|
445
|
+
#
|
446
|
+
# @example
|
447
|
+
# require "google/cloud/speech"
|
448
|
+
#
|
449
|
+
# speech = Google::Cloud::Speech.new
|
450
|
+
#
|
451
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
452
|
+
#
|
453
|
+
# # register callback for when an error is returned
|
454
|
+
# stream.on_error do |error|
|
455
|
+
# puts "The following error occurred while streaming: #{error}"
|
456
|
+
# stream.stop
|
457
|
+
# end
|
458
|
+
#
|
459
|
+
# # Stream 5 seconds of audio from the microhone
|
460
|
+
# # Actual implementation of microphone input varies by platform
|
461
|
+
# 5.times.do
|
462
|
+
# stream.send MicrophoneInput.read(32000)
|
463
|
+
# end
|
464
|
+
#
|
465
|
+
# stream.stop
|
466
|
+
#
|
467
|
+
def on_error &block
|
468
|
+
synchronize do
|
469
|
+
@callbacks[:error] << block
|
470
|
+
end
|
471
|
+
end
|
472
|
+
|
473
|
+
# @private returns error object from the stream thread.
|
474
|
+
def error! err
|
475
|
+
synchronize do
|
476
|
+
@callbacks[:error].each { |c| c.call err }
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
protected
|
481
|
+
|
482
|
+
def background_run
|
483
|
+
response_enum = @service.recognize_stream @request_queue.each_item
|
484
|
+
response_enum.each do |response|
|
485
|
+
begin
|
486
|
+
background_results response
|
487
|
+
background_endpointer response.endpointer_type
|
488
|
+
background_error response.error
|
489
|
+
rescue => e
|
490
|
+
error! Google::Cloud::Error.from_error(e)
|
491
|
+
end
|
492
|
+
end
|
493
|
+
Thread.pass
|
494
|
+
end
|
495
|
+
|
496
|
+
def background_results response
|
497
|
+
# Handle the results (StreamingRecognitionResult)
|
498
|
+
return unless response.results && response.results.any?
|
499
|
+
|
500
|
+
final_grpc, interim_grpcs = *response.results
|
501
|
+
if final_grpc && final_grpc.is_final
|
502
|
+
add_result! response.result_index, final_grpc
|
503
|
+
else
|
504
|
+
# all results are interim
|
505
|
+
interim_grpcs = response.results
|
506
|
+
end
|
507
|
+
|
508
|
+
# convert to Speech object from GRPC object
|
509
|
+
interim_results = Array(interim_grpcs).map do |grpc|
|
510
|
+
InterimResult.from_grpc grpc
|
511
|
+
end
|
512
|
+
# callback for interim results received
|
513
|
+
interim! interim_results if interim_results.any?
|
514
|
+
end
|
515
|
+
|
516
|
+
def background_endpointer endpointer
|
517
|
+
# Handle the endpointer by raising events
|
518
|
+
if endpointer == :START_OF_SPEECH
|
519
|
+
speech_start!
|
520
|
+
elsif endpointer == :END_OF_SPEECH
|
521
|
+
speech_end!
|
522
|
+
elsif endpointer == :END_OF_AUDIO
|
523
|
+
# TODO: do we automatically call stop here?
|
524
|
+
complete!
|
525
|
+
elsif endpointer == :END_OF_UTTERANCE
|
526
|
+
# TODO: do we automatically call stop here?
|
527
|
+
utterance!
|
528
|
+
end
|
529
|
+
end
|
530
|
+
|
531
|
+
def background_error error
|
532
|
+
return if error.nil?
|
533
|
+
|
534
|
+
require "grpc/errors"
|
535
|
+
fail GRPC::BadStatus.new(error.code, error.message)
|
536
|
+
end
|
537
|
+
|
538
|
+
# @private
|
539
|
+
class EnumeratorQueue
|
540
|
+
extend Forwardable
|
541
|
+
def_delegators :@q, :push
|
542
|
+
|
543
|
+
# @private
|
544
|
+
def initialize sentinel
|
545
|
+
@q = Queue.new
|
546
|
+
@sentinel = sentinel
|
547
|
+
end
|
548
|
+
|
549
|
+
# @private
|
550
|
+
def each_item
|
551
|
+
return enum_for(:each_item) unless block_given?
|
552
|
+
loop do
|
553
|
+
r = @q.pop
|
554
|
+
break if r.equal? @sentinel
|
555
|
+
fail r if r.is_a? Exception
|
556
|
+
yield r
|
557
|
+
end
|
558
|
+
end
|
559
|
+
end
|
560
|
+
end
|
561
|
+
end
|
562
|
+
end
|
563
|
+
end
|