google-cloud-speech 0.29.0 → 0.30.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +2 -1
- data/LICENSE +1 -1
- data/README.md +69 -43
- data/lib/google/cloud/speech.rb +94 -252
- data/lib/google/cloud/speech/v1.rb +11 -1
- data/lib/google/cloud/speech/v1/cloud_speech_services_pb.rb +1 -1
- data/lib/google/cloud/speech/{version.rb → v1/credentials.rb} +12 -2
- data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +2 -2
- data/lib/google/cloud/speech/v1/doc/google/longrunning/operations.rb +92 -0
- data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +1 -1
- data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +1 -1
- data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +1 -1
- data/lib/google/cloud/speech/v1/doc/overview.rb +1 -1
- data/lib/google/cloud/speech/v1/helpers.rb +93 -0
- data/lib/google/cloud/speech/v1/speech_client.rb +26 -49
- data/lib/google/cloud/speech/v1/speech_client_config.json +5 -5
- data/lib/google/cloud/speech/v1/stream.rb +614 -0
- data/lib/google/cloud/speech/v1p1beta1.rb +126 -0
- data/lib/google/cloud/speech/v1p1beta1/cloud_speech_pb.rb +175 -0
- data/lib/google/cloud/speech/v1p1beta1/cloud_speech_services_pb.rb +54 -0
- data/lib/google/cloud/speech/v1p1beta1/credentials.rb +32 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb +625 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/longrunning/operations.rb +92 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/any.rb +124 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/duration.rb +90 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/rpc/status.rb +83 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/overview.rb +73 -0
- data/lib/google/cloud/speech/v1p1beta1/helpers.rb +93 -0
- data/lib/google/cloud/speech/v1p1beta1/speech_client.rb +322 -0
- data/lib/google/cloud/speech/v1p1beta1/speech_client_config.json +41 -0
- data/lib/google/cloud/speech/v1p1beta1/stream.rb +614 -0
- metadata +29 -120
- data/lib/google-cloud-speech.rb +0 -142
- data/lib/google/cloud/speech/audio.rb +0 -330
- data/lib/google/cloud/speech/convert.rb +0 -46
- data/lib/google/cloud/speech/credentials.rb +0 -57
- data/lib/google/cloud/speech/operation.rb +0 -262
- data/lib/google/cloud/speech/project.rb +0 -651
- data/lib/google/cloud/speech/result.rb +0 -240
- data/lib/google/cloud/speech/service.rb +0 -121
- data/lib/google/cloud/speech/stream.rb +0 -564
@@ -1,240 +0,0 @@
|
|
1
|
-
# Copyright 2016 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "google/cloud/speech/v1"
|
17
|
-
require "google/cloud/speech/convert"
|
18
|
-
|
19
|
-
module Google
|
20
|
-
module Cloud
|
21
|
-
module Speech
|
22
|
-
##
|
23
|
-
# # Result
|
24
|
-
#
|
25
|
-
# A speech recognition result corresponding to a portion of the audio.
|
26
|
-
#
|
27
|
-
# See {Project#recognize} and {Operation#results}.
|
28
|
-
#
|
29
|
-
# @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.V1#google.cloud.speech.V1.SpeechRecognitionResult
|
30
|
-
# SpeechRecognitionResult
|
31
|
-
#
|
32
|
-
# @attr_reader [String] transcript Transcript text representing the words
|
33
|
-
# that the user spoke.
|
34
|
-
# @attr_reader [Float] confidence The confidence estimate between 0.0 and
|
35
|
-
# 1.0. A higher number means the system is more confident that the
|
36
|
-
# recognition is correct. This field is typically provided only for the
|
37
|
-
# top hypothesis. A value of 0.0 is a sentinel value indicating
|
38
|
-
# confidence was not set.
|
39
|
-
# @attr_reader [Array<Result::Word>] words A list of words with additional
|
40
|
-
# information about each word. Currently, the only additional
|
41
|
-
# information provided is the the start and end time offsets. Available
|
42
|
-
# when using the `words` argument in relevant methods.
|
43
|
-
# @attr_reader [Array<Result::Alternative>] alternatives Additional
|
44
|
-
# recognition hypotheses (up to the value specified in
|
45
|
-
# `max_alternatives`). The server may return fewer than
|
46
|
-
# `max_alternatives`.
|
47
|
-
#
|
48
|
-
# @example
|
49
|
-
# require "google/cloud/speech"
|
50
|
-
#
|
51
|
-
# speech = Google::Cloud::Speech.new
|
52
|
-
#
|
53
|
-
# audio = speech.audio "path/to/audio.raw",
|
54
|
-
# encoding: :linear16,
|
55
|
-
# language: "en-US",
|
56
|
-
# sample_rate: 16000
|
57
|
-
# results = audio.recognize
|
58
|
-
#
|
59
|
-
# result = results.first
|
60
|
-
# result.transcript #=> "how old is the Brooklyn Bridge"
|
61
|
-
# result.confidence #=> 0.9826789498329163
|
62
|
-
#
|
63
|
-
class Result
|
64
|
-
attr_reader :transcript, :confidence, :words, :alternatives
|
65
|
-
|
66
|
-
##
|
67
|
-
# @private Creates a new Results instance.
|
68
|
-
def initialize transcript, confidence, words = [], alternatives = []
|
69
|
-
@transcript = transcript
|
70
|
-
@confidence = confidence
|
71
|
-
@words = words
|
72
|
-
@alternatives = alternatives
|
73
|
-
end
|
74
|
-
|
75
|
-
##
|
76
|
-
# @private New Results from a SpeechRecognitionAlternative object.
|
77
|
-
def self.from_grpc grpc
|
78
|
-
head, *tail = *grpc.alternatives
|
79
|
-
return nil if head.nil?
|
80
|
-
words = Array(head.words).map do |w|
|
81
|
-
Word.new w.word, Convert.duration_to_number(w.start_time),
|
82
|
-
Convert.duration_to_number(w.end_time)
|
83
|
-
end
|
84
|
-
alternatives = tail.map do |alt|
|
85
|
-
Alternative.new alt.transcript, alt.confidence
|
86
|
-
end
|
87
|
-
new head.transcript, head.confidence, words, alternatives
|
88
|
-
end
|
89
|
-
|
90
|
-
##
|
91
|
-
# Word-specific information for recognized words. Currently, the only
|
92
|
-
# additional information provided is the the start and end time offsets.
|
93
|
-
# Available when using the `words` argument in relevant methods.
|
94
|
-
#
|
95
|
-
# @attr_reader [String] word The word corresponding to this set of
|
96
|
-
# information.
|
97
|
-
# @attr_reader [Numeric] start_time Time offset relative to the
|
98
|
-
# beginning of the audio, and corresponding to the start of the spoken
|
99
|
-
# word. This field is only set if `words` was specified. This is an
|
100
|
-
# experimental feature and the accuracy of the time offset can vary.
|
101
|
-
# @attr_reader [Numeric] end_time Time offset relative to the
|
102
|
-
# beginning of the audio, and corresponding to the end of the spoken
|
103
|
-
# word. This field is only set if `words` was specified. This is an
|
104
|
-
# experimental feature and the accuracy of the time offset can vary.
|
105
|
-
class Word
|
106
|
-
attr_reader :word, :start_time, :end_time
|
107
|
-
alias to_str word
|
108
|
-
|
109
|
-
##
|
110
|
-
# @private Creates a new Result::Word instance.
|
111
|
-
def initialize word, start_time, end_time
|
112
|
-
@word = word
|
113
|
-
@start_time = start_time
|
114
|
-
@end_time = end_time
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
##
|
119
|
-
# # Result::Alternative
|
120
|
-
#
|
121
|
-
# A speech recognition result corresponding to a portion of the audio.
|
122
|
-
#
|
123
|
-
# @attr_reader [String] transcript Transcript text representing the
|
124
|
-
# words that the user spoke.
|
125
|
-
# @attr_reader [Float] confidence The confidence estimate between 0.0
|
126
|
-
# and 1.0. A higher number means the system is more confident that the
|
127
|
-
# recognition is correct. This field is typically provided only for
|
128
|
-
# the top hypothesis. A value of 0.0 is a sentinel value indicating
|
129
|
-
# confidence was not set.
|
130
|
-
#
|
131
|
-
# @example
|
132
|
-
# require "google/cloud/speech"
|
133
|
-
#
|
134
|
-
# speech = Google::Cloud::Speech.new
|
135
|
-
#
|
136
|
-
# audio = speech.audio "path/to/audio.raw",
|
137
|
-
# encoding: :linear16,
|
138
|
-
# language: "en-US",
|
139
|
-
# sample_rate: 16000
|
140
|
-
# results = audio.recognize
|
141
|
-
#
|
142
|
-
# result = results.first
|
143
|
-
# result.transcript #=> "how old is the Brooklyn Bridge"
|
144
|
-
# result.confidence #=> 0.9826789498329163
|
145
|
-
# alternative = result.alternatives.first
|
146
|
-
# alternative.transcript #=> "how old is the Brooklyn brim"
|
147
|
-
# alternative.confidence #=> 0.22030000388622284
|
148
|
-
#
|
149
|
-
class Alternative
|
150
|
-
attr_reader :transcript, :confidence
|
151
|
-
|
152
|
-
##
|
153
|
-
# @private Creates a new Result::Alternative instance.
|
154
|
-
def initialize transcript, confidence
|
155
|
-
@transcript = transcript
|
156
|
-
@confidence = confidence
|
157
|
-
end
|
158
|
-
end
|
159
|
-
end
|
160
|
-
|
161
|
-
##
|
162
|
-
# # InterimResult
|
163
|
-
#
|
164
|
-
# A streaming speech recognition result corresponding to a portion of the
|
165
|
-
# audio that is currently being processed.
|
166
|
-
#
|
167
|
-
# See {Project#stream} and {Stream#on_interim}.
|
168
|
-
#
|
169
|
-
# @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.V1#google.cloud.speech.V1.SpeechRecognitionResult
|
170
|
-
# SpeechRecognitionResult
|
171
|
-
# @see https://cloud.google.com/speech/reference/rpc/google.cloud.speech.V1#google.cloud.speech.V1.StreamingRecognitionResult
|
172
|
-
# StreamingRecognitionResult
|
173
|
-
#
|
174
|
-
# @attr_reader [String] transcript Transcript text representing the words
|
175
|
-
# that the user spoke.
|
176
|
-
# @attr_reader [Float] confidence The confidence estimate between 0.0 and
|
177
|
-
# 1.0. A higher number means the system is more confident that the
|
178
|
-
# recognition is correct. This field is typically provided only for the
|
179
|
-
# top hypothesis. A value of 0.0 is a sentinel value indicating
|
180
|
-
# confidence was not set.
|
181
|
-
# @attr_reader [Float] stability An estimate of the probability that the
|
182
|
-
# recognizer will not change its guess about this interim result. Values
|
183
|
-
# range from 0.0 (completely unstable) to 1.0 (completely stable). Note
|
184
|
-
# that this is not the same as confidence, which estimates the
|
185
|
-
# probability that a recognition result is correct.
|
186
|
-
# @attr_reader [Array<Result::Alternative>] alternatives Additional
|
187
|
-
# recognition hypotheses (up to the value specified in
|
188
|
-
# `max_alternatives`).
|
189
|
-
#
|
190
|
-
# @example
|
191
|
-
# require "google/cloud/speech"
|
192
|
-
#
|
193
|
-
# speech = Google::Cloud::Speech.new
|
194
|
-
#
|
195
|
-
# stream = speech.stream encoding: :linear16,
|
196
|
-
# language: "en-US",
|
197
|
-
# sample_rate: 16000
|
198
|
-
#
|
199
|
-
# # register callback for when an interim result is returned
|
200
|
-
# stream.on_interim do |final_results, interim_results|
|
201
|
-
# interim_result = interim_results.first
|
202
|
-
# puts interim_result.transcript # "how old is the Brooklyn Bridge"
|
203
|
-
# puts interim_result.confidence # 0.9826789498329163
|
204
|
-
# puts interim_result.stability # 0.8999
|
205
|
-
# end
|
206
|
-
#
|
207
|
-
# # Stream 5 seconds of audio from the microphone
|
208
|
-
# # Actual implementation of microphone input varies by platform
|
209
|
-
# 5.times do
|
210
|
-
# stream.send MicrophoneInput.read(32000)
|
211
|
-
# end
|
212
|
-
#
|
213
|
-
# stream.stop
|
214
|
-
#
|
215
|
-
class InterimResult
|
216
|
-
attr_reader :transcript, :confidence, :stability, :alternatives
|
217
|
-
|
218
|
-
##
|
219
|
-
# @private Creates a new InterimResult instance.
|
220
|
-
def initialize transcript, confidence, stability, alternatives = []
|
221
|
-
@transcript = transcript
|
222
|
-
@confidence = confidence
|
223
|
-
@stability = stability
|
224
|
-
@alternatives = alternatives
|
225
|
-
end
|
226
|
-
|
227
|
-
##
|
228
|
-
# @private New InterimResult from a StreamingRecognitionResult object.
|
229
|
-
def self.from_grpc grpc
|
230
|
-
head, *tail = *grpc.alternatives
|
231
|
-
return nil if head.nil?
|
232
|
-
alternatives = tail.map do |alt|
|
233
|
-
Result::Alternative.new alt.transcript, alt.confidence
|
234
|
-
end
|
235
|
-
new head.transcript, head.confidence, grpc.stability, alternatives
|
236
|
-
end
|
237
|
-
end
|
238
|
-
end
|
239
|
-
end
|
240
|
-
end
|
@@ -1,121 +0,0 @@
|
|
1
|
-
# Copyright 2016 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "google/cloud/errors"
|
17
|
-
require "google/cloud/speech/credentials"
|
18
|
-
require "google/cloud/speech/version"
|
19
|
-
require "google/cloud/speech/v1"
|
20
|
-
|
21
|
-
module Google
|
22
|
-
module Cloud
|
23
|
-
module Speech
|
24
|
-
##
|
25
|
-
# @private Represents the gRPC Speech service, including all the API
|
26
|
-
# methods.
|
27
|
-
class Service
|
28
|
-
attr_accessor :project, :credentials, :timeout, :client_config
|
29
|
-
|
30
|
-
##
|
31
|
-
# LRO client configured for Speech
|
32
|
-
class SpeechOperationsClient < Google::Longrunning::OperationsClient
|
33
|
-
SERVICE_ADDRESS = V1::SpeechClient::SERVICE_ADDRESS
|
34
|
-
end
|
35
|
-
|
36
|
-
##
|
37
|
-
# Creates a new Service instance.
|
38
|
-
def initialize project, credentials, timeout: nil, client_config: nil
|
39
|
-
@project = project
|
40
|
-
@credentials = credentials
|
41
|
-
@timeout = timeout
|
42
|
-
@client_config = client_config || {}
|
43
|
-
end
|
44
|
-
|
45
|
-
def service
|
46
|
-
return mocked_service if mocked_service
|
47
|
-
@service ||= \
|
48
|
-
V1::SpeechClient.new(
|
49
|
-
credentials: credentials,
|
50
|
-
timeout: timeout,
|
51
|
-
client_config: client_config,
|
52
|
-
lib_name: "gccl",
|
53
|
-
lib_version: Google::Cloud::Speech::VERSION
|
54
|
-
)
|
55
|
-
end
|
56
|
-
attr_accessor :mocked_service
|
57
|
-
|
58
|
-
def ops
|
59
|
-
return mocked_ops if mocked_ops
|
60
|
-
@ops ||= \
|
61
|
-
SpeechOperationsClient.new(
|
62
|
-
credentials: credentials,
|
63
|
-
timeout: timeout,
|
64
|
-
client_config: client_config,
|
65
|
-
lib_name: "gccl",
|
66
|
-
lib_version: Google::Cloud::Speech::VERSION
|
67
|
-
)
|
68
|
-
end
|
69
|
-
attr_accessor :mocked_ops
|
70
|
-
|
71
|
-
def recognize_sync audio, config
|
72
|
-
execute do
|
73
|
-
service.recognize config, audio, options: default_options
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
def recognize_async audio, config
|
78
|
-
execute do
|
79
|
-
service.long_running_recognize \
|
80
|
-
config, audio, options: default_options
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def recognize_stream request_enum
|
85
|
-
# No need to handle errors here, they are handled in the enum
|
86
|
-
service.streaming_recognize request_enum, options: default_options
|
87
|
-
end
|
88
|
-
|
89
|
-
def get_op name
|
90
|
-
execute do
|
91
|
-
Google::Gax::Operation.new \
|
92
|
-
ops.get_operation(name), ops,
|
93
|
-
V1::LongRunningRecognizeResponse,
|
94
|
-
V1::LongRunningRecognizeMetadata
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
def inspect
|
99
|
-
"#{self.class}(#{@project})"
|
100
|
-
end
|
101
|
-
|
102
|
-
protected
|
103
|
-
|
104
|
-
def default_headers
|
105
|
-
{ "google-cloud-resource-prefix" => "projects/#{@project}" }
|
106
|
-
end
|
107
|
-
|
108
|
-
def default_options
|
109
|
-
Google::Gax::CallOptions.new kwargs: default_headers
|
110
|
-
end
|
111
|
-
|
112
|
-
def execute
|
113
|
-
yield
|
114
|
-
rescue Google::Gax::GaxError => e
|
115
|
-
# GaxError wraps BadStatus, but exposes it as #cause
|
116
|
-
raise Google::Cloud::Error.from_error(e.cause)
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
@@ -1,564 +0,0 @@
|
|
1
|
-
# Copyright 2016 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "google/cloud/speech/v1"
|
17
|
-
require "google/cloud/speech/result"
|
18
|
-
require "monitor"
|
19
|
-
require "forwardable"
|
20
|
-
|
21
|
-
module Google
|
22
|
-
module Cloud
|
23
|
-
module Speech
|
24
|
-
##
|
25
|
-
# # Stream
|
26
|
-
#
|
27
|
-
# A resource that represents the streaming requests and responses.
|
28
|
-
#
|
29
|
-
# @example
|
30
|
-
# require "google/cloud/speech"
|
31
|
-
#
|
32
|
-
# speech = Google::Cloud::Speech.new
|
33
|
-
#
|
34
|
-
# stream = speech.stream encoding: :linear16,
|
35
|
-
# language: "en-US",
|
36
|
-
# sample_rate: 16000
|
37
|
-
#
|
38
|
-
# # Stream 5 seconds of audio from the microphone
|
39
|
-
# # Actual implementation of microphone input varies by platform
|
40
|
-
# 5.times do
|
41
|
-
# stream.send MicrophoneInput.read(32000)
|
42
|
-
# end
|
43
|
-
#
|
44
|
-
# stream.stop
|
45
|
-
# stream.wait_until_complete!
|
46
|
-
#
|
47
|
-
# results = stream.results
|
48
|
-
# result = results.first
|
49
|
-
# result.transcript #=> "how old is the Brooklyn Bridge"
|
50
|
-
# result.confidence #=> 0.9826789498329163
|
51
|
-
#
|
52
|
-
class Stream
|
53
|
-
include MonitorMixin
|
54
|
-
##
|
55
|
-
# @private Creates a new Speech Stream instance.
|
56
|
-
# This must always be private, since it may change as the implementation
|
57
|
-
# changes over time.
|
58
|
-
def initialize service, streaming_recognize_request
|
59
|
-
@service = service
|
60
|
-
@streaming_recognize_request = streaming_recognize_request
|
61
|
-
@results = []
|
62
|
-
@callbacks = Hash.new { |h, k| h[k] = [] }
|
63
|
-
super() # to init MonitorMixin
|
64
|
-
end
|
65
|
-
|
66
|
-
##
|
67
|
-
# Starts the stream. The stream will be started in the first #send call.
|
68
|
-
def start
|
69
|
-
return if @request_queue
|
70
|
-
@request_queue = EnumeratorQueue.new(self)
|
71
|
-
@request_queue.push @streaming_recognize_request
|
72
|
-
|
73
|
-
Thread.new { background_run }
|
74
|
-
end
|
75
|
-
|
76
|
-
##
|
77
|
-
# Checks if the stream has been started.
|
78
|
-
#
|
79
|
-
# @return [boolean] `true` when started, `false` otherwise.
|
80
|
-
def started?
|
81
|
-
synchronize do
|
82
|
-
!(!@request_queue)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
##
|
87
|
-
# Sends audio content to the server.
|
88
|
-
#
|
89
|
-
# @param [String] bytes A string of binary audio data to be recognized.
|
90
|
-
# The data should be encoded as `ASCII-8BIT`.
|
91
|
-
#
|
92
|
-
# @example
|
93
|
-
# require "google/cloud/speech"
|
94
|
-
#
|
95
|
-
# speech = Google::Cloud::Speech.new
|
96
|
-
#
|
97
|
-
# audio = speech.audio "path/to/audio.raw"
|
98
|
-
#
|
99
|
-
# stream = speech.stream encoding: :linear16,
|
100
|
-
# language: "en-US",
|
101
|
-
# sample_rate: 16000
|
102
|
-
#
|
103
|
-
# # Stream 5 seconds of audio from the microphone
|
104
|
-
# # Actual implementation of microphone input varies by platform
|
105
|
-
# 5.times do
|
106
|
-
# stream.send MicrophoneInput.read(32000)
|
107
|
-
# end
|
108
|
-
#
|
109
|
-
# stream.stop
|
110
|
-
# stream.wait_until_complete!
|
111
|
-
#
|
112
|
-
# results = stream.results
|
113
|
-
# result = results.first
|
114
|
-
# result.transcript #=> "how old is the Brooklyn Bridge"
|
115
|
-
# result.confidence #=> 0.9826789498329163
|
116
|
-
#
|
117
|
-
def send bytes
|
118
|
-
start # lazily call start if the stream wasn't started yet
|
119
|
-
# TODO: do not send if stopped?
|
120
|
-
synchronize do
|
121
|
-
req = V1::StreamingRecognizeRequest.new(
|
122
|
-
audio_content: bytes.encode("ASCII-8BIT")
|
123
|
-
)
|
124
|
-
@request_queue.push req
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
##
|
129
|
-
# Stops the stream. Signals to the server that no more data will be
|
130
|
-
# sent.
|
131
|
-
def stop
|
132
|
-
synchronize do
|
133
|
-
return if @request_queue.nil?
|
134
|
-
@request_queue.push self
|
135
|
-
@stopped = true
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
##
|
140
|
-
# Checks if the stream has been stopped.
|
141
|
-
#
|
142
|
-
# @return [boolean] `true` when stopped, `false` otherwise.
|
143
|
-
def stopped?
|
144
|
-
synchronize do
|
145
|
-
@stopped
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
##
|
150
|
-
# The speech recognition results for the audio.
|
151
|
-
#
|
152
|
-
# @return [Array<Result>] The transcribed text of audio recognized.
|
153
|
-
#
|
154
|
-
# @example
|
155
|
-
# require "google/cloud/speech"
|
156
|
-
#
|
157
|
-
# speech = Google::Cloud::Speech.new
|
158
|
-
#
|
159
|
-
# stream = speech.stream encoding: :linear16,
|
160
|
-
# language: "en-US",
|
161
|
-
# sample_rate: 16000
|
162
|
-
#
|
163
|
-
# # Stream 5 seconds of audio from the microphone
|
164
|
-
# # Actual implementation of microphone input varies by platform
|
165
|
-
# 5.times do
|
166
|
-
# stream.send MicrophoneInput.read(32000)
|
167
|
-
# end
|
168
|
-
#
|
169
|
-
# stream.stop
|
170
|
-
#
|
171
|
-
# results = stream.results
|
172
|
-
# results.each do |result|
|
173
|
-
# puts result.transcript
|
174
|
-
# puts result.confidence
|
175
|
-
# end
|
176
|
-
#
|
177
|
-
def results
|
178
|
-
synchronize do
|
179
|
-
@results
|
180
|
-
end
|
181
|
-
end
|
182
|
-
|
183
|
-
##
|
184
|
-
# Whether all speech recognition results have been returned.
|
185
|
-
#
|
186
|
-
# @return [Boolean] All speech recognition results have been returned.
|
187
|
-
#
|
188
|
-
# @example
|
189
|
-
# require "google/cloud/speech"
|
190
|
-
#
|
191
|
-
# speech = Google::Cloud::Speech.new
|
192
|
-
#
|
193
|
-
# stream = speech.stream encoding: :linear16,
|
194
|
-
# language: "en-US",
|
195
|
-
# sample_rate: 16000
|
196
|
-
#
|
197
|
-
# # Stream 5 seconds of audio from the microphone
|
198
|
-
# # Actual implementation of microphone input varies by platform
|
199
|
-
# 5.times do
|
200
|
-
# stream.send MicrophoneInput.read(32000)
|
201
|
-
# end
|
202
|
-
#
|
203
|
-
# stream.stop
|
204
|
-
#
|
205
|
-
# stream.wait_until_complete!
|
206
|
-
# stream.complete? #=> true
|
207
|
-
#
|
208
|
-
# results = stream.results
|
209
|
-
# results.each do |result|
|
210
|
-
# puts result.transcript
|
211
|
-
# puts result.confidence
|
212
|
-
# end
|
213
|
-
#
|
214
|
-
def complete?
|
215
|
-
synchronize do
|
216
|
-
@complete
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
##
|
221
|
-
# Blocks until all speech recognition results have been returned.
|
222
|
-
#
|
223
|
-
# @example
|
224
|
-
# require "google/cloud/speech"
|
225
|
-
#
|
226
|
-
# speech = Google::Cloud::Speech.new
|
227
|
-
#
|
228
|
-
# stream = speech.stream encoding: :linear16,
|
229
|
-
# language: "en-US",
|
230
|
-
# sample_rate: 16000
|
231
|
-
#
|
232
|
-
# # Stream 5 seconds of audio from the microphone
|
233
|
-
# # Actual implementation of microphone input varies by platform
|
234
|
-
# 5.times do
|
235
|
-
# stream.send MicrophoneInput.read(32000)
|
236
|
-
# end
|
237
|
-
#
|
238
|
-
# stream.stop
|
239
|
-
#
|
240
|
-
# stream.wait_until_complete!
|
241
|
-
# stream.complete? #=> true
|
242
|
-
#
|
243
|
-
# results = stream.results
|
244
|
-
# results.each do |result|
|
245
|
-
# puts result.transcript
|
246
|
-
# puts result.confidence
|
247
|
-
# end
|
248
|
-
#
|
249
|
-
def wait_until_complete!
|
250
|
-
complete_check = nil
|
251
|
-
synchronize { complete_check = @complete }
|
252
|
-
while complete_check.nil?
|
253
|
-
sleep 1
|
254
|
-
synchronize { complete_check = @complete }
|
255
|
-
end
|
256
|
-
end
|
257
|
-
|
258
|
-
##
|
259
|
-
# Register to be notified on the reception of an interim result.
|
260
|
-
#
|
261
|
-
# @yield [callback] The block for accessing final and interim results.
|
262
|
-
# @yieldparam [Array<Result>] final_results The final results.
|
263
|
-
# @yieldparam [Array<Result>] interim_results The interim results.
|
264
|
-
#
|
265
|
-
# @example
|
266
|
-
# require "google/cloud/speech"
|
267
|
-
#
|
268
|
-
# speech = Google::Cloud::Speech.new
|
269
|
-
#
|
270
|
-
# stream = speech.stream encoding: :linear16,
|
271
|
-
# language: "en-US",
|
272
|
-
# sample_rate: 16000
|
273
|
-
#
|
274
|
-
# # register callback for when an interim result is returned
|
275
|
-
# stream.on_interim do |final_results, interim_results|
|
276
|
-
# interim_result = interim_results.first
|
277
|
-
# puts interim_result.transcript # "how old is the Brooklyn Bridge"
|
278
|
-
# puts interim_result.confidence # 0.9826789498329163
|
279
|
-
# puts interim_result.stability # 0.8999
|
280
|
-
# end
|
281
|
-
#
|
282
|
-
# # Stream 5 seconds of audio from the microphone
|
283
|
-
# # Actual implementation of microphone input varies by platform
|
284
|
-
# 5.times do
|
285
|
-
# stream.send MicrophoneInput.read(32000)
|
286
|
-
# end
|
287
|
-
#
|
288
|
-
# stream.stop
|
289
|
-
#
|
290
|
-
def on_interim &block
|
291
|
-
synchronize do
|
292
|
-
@callbacks[:interim] << block
|
293
|
-
end
|
294
|
-
end
|
295
|
-
|
296
|
-
##
|
297
|
-
# @private yields two arguments, all final results and the
|
298
|
-
# non-final/incomplete result
|
299
|
-
def pass_interim! interim_results
|
300
|
-
synchronize do
|
301
|
-
@callbacks[:interim].each { |c| c.call results, interim_results }
|
302
|
-
end
|
303
|
-
end
|
304
|
-
|
305
|
-
##
|
306
|
-
# Register to be notified on the reception of a final result.
|
307
|
-
#
|
308
|
-
# @yield [callback] The block for accessing final results.
|
309
|
-
# @yieldparam [Array<Result>] results The final results.
|
310
|
-
#
|
311
|
-
# @example
|
312
|
-
# require "google/cloud/speech"
|
313
|
-
#
|
314
|
-
# speech = Google::Cloud::Speech.new
|
315
|
-
#
|
316
|
-
# stream = speech.stream encoding: :linear16,
|
317
|
-
# language: "en-US",
|
318
|
-
# sample_rate: 16000
|
319
|
-
#
|
320
|
-
# # Stream 5 seconds of audio from the microphone
|
321
|
-
# # Actual implementation of microphone input varies by platform
|
322
|
-
# 5.times do
|
323
|
-
# stream.send MicrophoneInput.read(32000)
|
324
|
-
# end
|
325
|
-
#
|
326
|
-
# stream.stop
|
327
|
-
# stream.wait_until_complete!
|
328
|
-
#
|
329
|
-
# results = stream.results
|
330
|
-
# result = results.first
|
331
|
-
# result.transcript #=> "how old is the Brooklyn Bridge"
|
332
|
-
# result.confidence #=> 0.9826789498329163
|
333
|
-
#
|
334
|
-
def on_result &block
|
335
|
-
synchronize do
|
336
|
-
@callbacks[:result] << block
|
337
|
-
end
|
338
|
-
end
|
339
|
-
|
340
|
-
##
|
341
|
-
# @private add a result object, and call the callbacks
|
342
|
-
def pass_result! result_grpc
|
343
|
-
synchronize do
|
344
|
-
@results << Result.from_grpc(result_grpc)
|
345
|
-
@callbacks[:result].each { |c| c.call @results }
|
346
|
-
end
|
347
|
-
end
|
348
|
-
|
349
|
-
##
|
350
|
-
# Register to be notified when the end of the audio stream has been
|
351
|
-
# reached.
|
352
|
-
#
|
353
|
-
# @yield [callback] The block to be called when the end of the audio
|
354
|
-
# stream has been reached.
|
355
|
-
#
|
356
|
-
# @example
|
357
|
-
# require "google/cloud/speech"
|
358
|
-
#
|
359
|
-
# speech = Google::Cloud::Speech.new
|
360
|
-
#
|
361
|
-
# stream = speech.stream encoding: :linear16,
|
362
|
-
# language: "en-US",
|
363
|
-
# sample_rate: 16000
|
364
|
-
#
|
365
|
-
# # register callback for when stream has ended.
|
366
|
-
# stream.on_complete do
|
367
|
-
# puts "Stream has ended."
|
368
|
-
# end
|
369
|
-
#
|
370
|
-
# # Stream 5 seconds of audio from the microphone
|
371
|
-
# # Actual implementation of microphone input varies by platform
|
372
|
-
# 5.times do
|
373
|
-
# stream.send MicrophoneInput.read(32000)
|
374
|
-
# end
|
375
|
-
#
|
376
|
-
# stream.stop
|
377
|
-
#
|
378
|
-
def on_complete &block
|
379
|
-
synchronize do
|
380
|
-
@callbacks[:complete] << block
|
381
|
-
end
|
382
|
-
end
|
383
|
-
|
384
|
-
##
|
385
|
-
# @private yields when the end of the audio stream has been reached.
|
386
|
-
def pass_complete!
|
387
|
-
synchronize do
|
388
|
-
@complete = true
|
389
|
-
@callbacks[:complete].each(&:call)
|
390
|
-
end
|
391
|
-
end
|
392
|
-
|
393
|
-
##
|
394
|
-
# Register to be notified when the server has detected the end of the
|
395
|
-
# user's speech utterance and expects no additional speech. Therefore,
|
396
|
-
# the server will not process additional audio. The client should stop
|
397
|
-
# sending additional audio data. This event only occurs when `utterance`
|
398
|
-
# is `true`.
|
399
|
-
#
|
400
|
-
# @yield [callback] The block to be called when the end of the audio
|
401
|
-
# stream has been reached.
|
402
|
-
#
|
403
|
-
# @example
|
404
|
-
# require "google/cloud/speech"
|
405
|
-
#
|
406
|
-
# speech = Google::Cloud::Speech.new
|
407
|
-
#
|
408
|
-
# stream = speech.stream encoding: :linear16,
|
409
|
-
# language: "en-US",
|
410
|
-
# sample_rate: 16000,
|
411
|
-
# utterance: true
|
412
|
-
#
|
413
|
-
# # register callback for when utterance has occurred.
|
414
|
-
# stream.on_utterance do
|
415
|
-
# puts "Utterance has occurred."
|
416
|
-
# stream.stop
|
417
|
-
# end
|
418
|
-
#
|
419
|
-
# # Stream 5 seconds of audio from the microphone
|
420
|
-
# # Actual implementation of microphone input varies by platform
|
421
|
-
# 5.times do
|
422
|
-
# stream.send MicrophoneInput.read(32000)
|
423
|
-
# end
|
424
|
-
#
|
425
|
-
# stream.stop unless stream.stopped?
|
426
|
-
#
|
427
|
-
def on_utterance &block
|
428
|
-
synchronize do
|
429
|
-
@callbacks[:utterance] << block
|
430
|
-
end
|
431
|
-
end
|
432
|
-
|
433
|
-
##
|
434
|
-
# @private returns single final result once :END_OF_SINGLE_UTTERANCE is
|
435
|
-
# received.
|
436
|
-
def pass_utterance!
|
437
|
-
synchronize do
|
438
|
-
@callbacks[:utterance].each(&:call)
|
439
|
-
end
|
440
|
-
end
|
441
|
-
|
442
|
-
##
|
443
|
-
# Register to be notified of an error received during the stream.
|
444
|
-
#
|
445
|
-
# @yield [callback] The block for accessing final results.
|
446
|
-
# @yieldparam [Exception] error The error raised.
|
447
|
-
#
|
448
|
-
# @example
|
449
|
-
# require "google/cloud/speech"
|
450
|
-
#
|
451
|
-
# speech = Google::Cloud::Speech.new
|
452
|
-
#
|
453
|
-
# stream = speech.stream encoding: :linear16,
|
454
|
-
# language: "en-US",
|
455
|
-
# sample_rate: 16000
|
456
|
-
#
|
457
|
-
# # register callback for when an error is returned
|
458
|
-
# stream.on_error do |error|
|
459
|
-
# puts "The following error occurred while streaming: #{error}"
|
460
|
-
# stream.stop
|
461
|
-
# end
|
462
|
-
#
|
463
|
-
# # Stream 5 seconds of audio from the microphone
|
464
|
-
# # Actual implementation of microphone input varies by platform
|
465
|
-
# 5.times do
|
466
|
-
# stream.send MicrophoneInput.read(32000)
|
467
|
-
# end
|
468
|
-
#
|
469
|
-
# stream.stop
|
470
|
-
#
|
471
|
-
def on_error &block
|
472
|
-
synchronize do
|
473
|
-
@callbacks[:error] << block
|
474
|
-
end
|
475
|
-
end
|
476
|
-
|
477
|
-
# @private returns error object from the stream thread.
|
478
|
-
def error! err
|
479
|
-
synchronize do
|
480
|
-
@callbacks[:error].each { |c| c.call err }
|
481
|
-
end
|
482
|
-
end
|
483
|
-
|
484
|
-
protected
|
485
|
-
|
486
|
-
def background_run
|
487
|
-
response_enum = @service.recognize_stream @request_queue.each_item
|
488
|
-
response_enum.each do |response|
|
489
|
-
begin
|
490
|
-
background_results response
|
491
|
-
background_event_type response.speech_event_type
|
492
|
-
background_error response.error
|
493
|
-
rescue StandardError => e
|
494
|
-
error! Google::Cloud::Error.from_error(e)
|
495
|
-
end
|
496
|
-
end
|
497
|
-
rescue StandardError => e
|
498
|
-
error! Google::Cloud::Error.from_error(e)
|
499
|
-
ensure
|
500
|
-
pass_complete!
|
501
|
-
Thread.pass
|
502
|
-
end
|
503
|
-
|
504
|
-
def background_results response
|
505
|
-
# Handle the results (StreamingRecognitionResult)
|
506
|
-
return unless response.results && response.results.any?
|
507
|
-
|
508
|
-
final_grpc, interim_grpcs = *response.results
|
509
|
-
unless final_grpc && final_grpc.is_final
|
510
|
-
# all results are interim
|
511
|
-
final_grpc = nil
|
512
|
-
interim_grpcs = response.results
|
513
|
-
end
|
514
|
-
|
515
|
-
# convert to Speech object from GRPC object
|
516
|
-
interim_results = Array(interim_grpcs).map do |grpc|
|
517
|
-
InterimResult.from_grpc grpc
|
518
|
-
end
|
519
|
-
|
520
|
-
# callback for interim results received
|
521
|
-
pass_interim! interim_results if interim_results.any?
|
522
|
-
# callback for final results received, if any
|
523
|
-
pass_result! final_grpc if final_grpc
|
524
|
-
end
|
525
|
-
|
526
|
-
def background_event_type event_type
|
527
|
-
# Handle the event_type by raising events
|
528
|
-
# TODO: do we automatically call stop here?
|
529
|
-
pass_utterance! if event_type == :END_OF_SINGLE_UTTERANCE
|
530
|
-
end
|
531
|
-
|
532
|
-
def background_error error
|
533
|
-
return if error.nil?
|
534
|
-
|
535
|
-
require "grpc/errors"
|
536
|
-
raise GRPC::BadStatus.new(error.code, error.message)
|
537
|
-
end
|
538
|
-
|
539
|
-
# @private
|
540
|
-
class EnumeratorQueue
|
541
|
-
extend Forwardable
|
542
|
-
def_delegators :@q, :push
|
543
|
-
|
544
|
-
# @private
|
545
|
-
def initialize sentinel
|
546
|
-
@q = Queue.new
|
547
|
-
@sentinel = sentinel
|
548
|
-
end
|
549
|
-
|
550
|
-
# @private
|
551
|
-
def each_item
|
552
|
-
return enum_for(:each_item) unless block_given?
|
553
|
-
loop do
|
554
|
-
r = @q.pop
|
555
|
-
break if r.equal? @sentinel
|
556
|
-
raise r if r.is_a? Exception
|
557
|
-
yield r
|
558
|
-
end
|
559
|
-
end
|
560
|
-
end
|
561
|
-
end
|
562
|
-
end
|
563
|
-
end
|
564
|
-
end
|