google-cloud-speech 0.29.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +2 -1
- data/LICENSE +1 -1
- data/README.md +69 -43
- data/lib/google/cloud/speech.rb +94 -252
- data/lib/google/cloud/speech/v1.rb +11 -1
- data/lib/google/cloud/speech/v1/cloud_speech_services_pb.rb +1 -1
- data/lib/google/cloud/speech/{version.rb → v1/credentials.rb} +12 -2
- data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +2 -2
- data/lib/google/cloud/speech/v1/doc/google/longrunning/operations.rb +92 -0
- data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +1 -1
- data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +1 -1
- data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +1 -1
- data/lib/google/cloud/speech/v1/doc/overview.rb +1 -1
- data/lib/google/cloud/speech/v1/helpers.rb +93 -0
- data/lib/google/cloud/speech/v1/speech_client.rb +26 -49
- data/lib/google/cloud/speech/v1/speech_client_config.json +5 -5
- data/lib/google/cloud/speech/v1/stream.rb +614 -0
- data/lib/google/cloud/speech/v1p1beta1.rb +126 -0
- data/lib/google/cloud/speech/v1p1beta1/cloud_speech_pb.rb +175 -0
- data/lib/google/cloud/speech/v1p1beta1/cloud_speech_services_pb.rb +54 -0
- data/lib/google/cloud/speech/v1p1beta1/credentials.rb +32 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb +625 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/longrunning/operations.rb +92 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/any.rb +124 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/duration.rb +90 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/rpc/status.rb +83 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/overview.rb +73 -0
- data/lib/google/cloud/speech/v1p1beta1/helpers.rb +93 -0
- data/lib/google/cloud/speech/v1p1beta1/speech_client.rb +322 -0
- data/lib/google/cloud/speech/v1p1beta1/speech_client_config.json +41 -0
- data/lib/google/cloud/speech/v1p1beta1/stream.rb +614 -0
- metadata +29 -120
- data/lib/google-cloud-speech.rb +0 -142
- data/lib/google/cloud/speech/audio.rb +0 -330
- data/lib/google/cloud/speech/convert.rb +0 -46
- data/lib/google/cloud/speech/credentials.rb +0 -57
- data/lib/google/cloud/speech/operation.rb +0 -262
- data/lib/google/cloud/speech/project.rb +0 -651
- data/lib/google/cloud/speech/result.rb +0 -240
- data/lib/google/cloud/speech/service.rb +0 -121
- data/lib/google/cloud/speech/stream.rb +0 -564
@@ -1,46 +0,0 @@
|
|
1
|
-
# Copyright 2017 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "google/protobuf/duration_pb"
|
17
|
-
|
18
|
-
module Google
|
19
|
-
module Cloud
|
20
|
-
module Speech
|
21
|
-
##
|
22
|
-
# @private Helper module for converting Speech values.
|
23
|
-
module Convert
|
24
|
-
module ClassMethods
|
25
|
-
def number_to_duration number
|
26
|
-
return nil if number.nil?
|
27
|
-
|
28
|
-
Google::Protobuf::Duration.new \
|
29
|
-
seconds: number.to_i,
|
30
|
-
nanos: (number.remainder(1) * 1000000000).round
|
31
|
-
end
|
32
|
-
|
33
|
-
def duration_to_number duration
|
34
|
-
return nil if duration.nil?
|
35
|
-
|
36
|
-
return duration.seconds if duration.nanos.zero?
|
37
|
-
|
38
|
-
duration.seconds + (duration.nanos / 1000000000.0)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
extend ClassMethods
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
@@ -1,57 +0,0 @@
|
|
1
|
-
# Copyright 2016 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "googleauth"
|
17
|
-
|
18
|
-
module Google
|
19
|
-
module Cloud
|
20
|
-
module Speech
|
21
|
-
##
|
22
|
-
# # Credentials
|
23
|
-
#
|
24
|
-
# Represents the authentication and authorization used to connect to the
|
25
|
-
# Speech API.
|
26
|
-
#
|
27
|
-
# @example
|
28
|
-
# require "google/cloud/speech"
|
29
|
-
#
|
30
|
-
# keyfile = "/path/to/keyfile.json"
|
31
|
-
# creds = Google::Cloud::Speech::Credentials.new keyfile
|
32
|
-
#
|
33
|
-
# speech = Google::Cloud::Speech.new(
|
34
|
-
# project_id: "my-project",
|
35
|
-
# credentials: creds
|
36
|
-
# )
|
37
|
-
#
|
38
|
-
# speech.project_id #=> "my-project"
|
39
|
-
#
|
40
|
-
class Credentials < Google::Auth::Credentials
|
41
|
-
SCOPE = ["https://www.googleapis.com/auth/cloud-platform"].freeze
|
42
|
-
PATH_ENV_VARS = %w[SPEECH_CREDENTIALS
|
43
|
-
SPEECH_KEYFILE
|
44
|
-
GOOGLE_CLOUD_CREDENTIALS
|
45
|
-
GOOGLE_CLOUD_KEYFILE
|
46
|
-
GCLOUD_KEYFILE].freeze
|
47
|
-
JSON_ENV_VARS = %w[SPEECH_CREDENTIALS_JSON
|
48
|
-
SPEECH_KEYFILE_JSON
|
49
|
-
GOOGLE_CLOUD_CREDENTIALS_JSON
|
50
|
-
GOOGLE_CLOUD_KEYFILE_JSON
|
51
|
-
GCLOUD_KEYFILE_JSON].freeze
|
52
|
-
DEFAULT_PATHS = \
|
53
|
-
["~/.config/gcloud/application_default_credentials.json"].freeze
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
@@ -1,262 +0,0 @@
|
|
1
|
-
# Copyright 2016 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "google/cloud/speech/v1"
|
17
|
-
require "google/cloud/errors"
|
18
|
-
|
19
|
-
module Google
|
20
|
-
module Cloud
|
21
|
-
module Speech
|
22
|
-
##
|
23
|
-
# # Operation
|
24
|
-
#
|
25
|
-
# A resource represents the long-running, asynchronous processing of a
|
26
|
-
# speech-recognition operation. The op can be refreshed to retrieve
|
27
|
-
# recognition results once the audio data has been processed.
|
28
|
-
#
|
29
|
-
# See {Project#process} and {Audio#process}.
|
30
|
-
#
|
31
|
-
# @see https://cloud.google.com/speech/docs/basics#async-responses
|
32
|
-
# Asynchronous Speech API Responses
|
33
|
-
# @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
|
34
|
-
# Long-running Operation
|
35
|
-
#
|
36
|
-
# @example
|
37
|
-
# require "google/cloud/speech"
|
38
|
-
#
|
39
|
-
# speech = Google::Cloud::Speech.new
|
40
|
-
#
|
41
|
-
# op = speech.process "path/to/audio.raw",
|
42
|
-
# encoding: :linear16,
|
43
|
-
# language: "en-US",
|
44
|
-
# sample_rate: 16000
|
45
|
-
#
|
46
|
-
# op.done? #=> false
|
47
|
-
# op.reload! # API call
|
48
|
-
# op.done? #=> true
|
49
|
-
# results = op.results
|
50
|
-
#
|
51
|
-
class Operation
|
52
|
-
##
|
53
|
-
# @private The Google::Gax::Operation gRPC object.
|
54
|
-
attr_accessor :grpc
|
55
|
-
|
56
|
-
##
|
57
|
-
# @private Creates a new Job instance.
|
58
|
-
def initialize
|
59
|
-
@grpc = nil
|
60
|
-
end
|
61
|
-
|
62
|
-
##
|
63
|
-
# The unique identifier for the long running operation.
|
64
|
-
#
|
65
|
-
# @return [String] The unique identifier for the long running operation.
|
66
|
-
#
|
67
|
-
# @example
|
68
|
-
# require "google/cloud/speech"
|
69
|
-
#
|
70
|
-
# speech = Google::Cloud::Speech.new
|
71
|
-
#
|
72
|
-
# op = speech.process "path/to/audio.raw",
|
73
|
-
# encoding: :linear16,
|
74
|
-
# language: "en-US",
|
75
|
-
# sample_rate: 16000
|
76
|
-
#
|
77
|
-
# op.id #=> "1234567890"
|
78
|
-
#
|
79
|
-
def id
|
80
|
-
@grpc.name
|
81
|
-
end
|
82
|
-
|
83
|
-
##
|
84
|
-
# Checks if the speech-recognition processing of the audio data is
|
85
|
-
# complete.
|
86
|
-
#
|
87
|
-
# @return [boolean] `true` when complete, `false` otherwise.
|
88
|
-
#
|
89
|
-
# @example
|
90
|
-
# require "google/cloud/speech"
|
91
|
-
#
|
92
|
-
# speech = Google::Cloud::Speech.new
|
93
|
-
#
|
94
|
-
# op = speech.process "path/to/audio.raw",
|
95
|
-
# encoding: :linear16,
|
96
|
-
# language: "en-US",
|
97
|
-
# sample_rate: 16000
|
98
|
-
#
|
99
|
-
# op.done? #=> false
|
100
|
-
#
|
101
|
-
def done?
|
102
|
-
@grpc.done?
|
103
|
-
end
|
104
|
-
|
105
|
-
##
|
106
|
-
# A speech recognition result corresponding to a portion of the audio.
|
107
|
-
#
|
108
|
-
# @return [Array<Result>] The transcribed text of audio recognized. If
|
109
|
-
# the op is not done this will return `nil`.
|
110
|
-
#
|
111
|
-
# @example
|
112
|
-
# require "google/cloud/speech"
|
113
|
-
#
|
114
|
-
# speech = Google::Cloud::Speech.new
|
115
|
-
#
|
116
|
-
# op = speech.process "path/to/audio.raw",
|
117
|
-
# encoding: :linear16,
|
118
|
-
# language: "en-US",
|
119
|
-
# sample_rate: 16000
|
120
|
-
#
|
121
|
-
# op.done? #=> true
|
122
|
-
# op.results? #=> true
|
123
|
-
# results = op.results
|
124
|
-
#
|
125
|
-
def results
|
126
|
-
return nil unless results?
|
127
|
-
@grpc.response.results.map do |result_grpc|
|
128
|
-
Result.from_grpc result_grpc
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
##
|
133
|
-
# Checks if the speech-recognition processing of the audio data is
|
134
|
-
# complete.
|
135
|
-
#
|
136
|
-
# @return [boolean] `true` when complete, `false` otherwise.
|
137
|
-
#
|
138
|
-
# @example
|
139
|
-
# require "google/cloud/speech"
|
140
|
-
#
|
141
|
-
# speech = Google::Cloud::Speech.new
|
142
|
-
#
|
143
|
-
# op = speech.process "path/to/audio.raw",
|
144
|
-
# encoding: :linear16,
|
145
|
-
# language: "en-US",
|
146
|
-
# sample_rate: 16000
|
147
|
-
#
|
148
|
-
# op.done? #=> true
|
149
|
-
# op.results? #=> true
|
150
|
-
# results = op.results
|
151
|
-
#
|
152
|
-
def results?
|
153
|
-
@grpc.response?
|
154
|
-
end
|
155
|
-
|
156
|
-
##
|
157
|
-
# The error information if the speech-recognition processing of the
|
158
|
-
# audio data has returned an error.
|
159
|
-
#
|
160
|
-
# @return [Google::Cloud::Error] The error.
|
161
|
-
#
|
162
|
-
# @example
|
163
|
-
# require "google/cloud/speech"
|
164
|
-
#
|
165
|
-
# speech = Google::Cloud::Speech.new
|
166
|
-
#
|
167
|
-
# op = speech.process "path/to/audio.raw",
|
168
|
-
# encoding: :linear16,
|
169
|
-
# language: "en-US",
|
170
|
-
# sample_rate: 16000
|
171
|
-
#
|
172
|
-
# op.done? #=> true
|
173
|
-
# op.error? #=> true
|
174
|
-
# error = op.error
|
175
|
-
#
|
176
|
-
def error
|
177
|
-
return nil unless error?
|
178
|
-
Google::Cloud::Error.from_error @grpc.error
|
179
|
-
end
|
180
|
-
|
181
|
-
##
|
182
|
-
# Checks if the speech-recognition processing of the audio data has
|
183
|
-
# returned an error.
|
184
|
-
#
|
185
|
-
# @return [boolean] `true` when errored, `false` otherwise.
|
186
|
-
#
|
187
|
-
# @example
|
188
|
-
# require "google/cloud/speech"
|
189
|
-
#
|
190
|
-
# speech = Google::Cloud::Speech.new
|
191
|
-
#
|
192
|
-
# op = speech.process "path/to/audio.raw",
|
193
|
-
# encoding: :linear16,
|
194
|
-
# language: "en-US",
|
195
|
-
# sample_rate: 16000
|
196
|
-
#
|
197
|
-
# op.done? #=> true
|
198
|
-
# op.error? #=> true
|
199
|
-
# error = op.error
|
200
|
-
#
|
201
|
-
def error?
|
202
|
-
@grpc.error?
|
203
|
-
end
|
204
|
-
|
205
|
-
##
|
206
|
-
# Reloads the op with current data from the long-running, asynchronous
|
207
|
-
# processing of a speech-recognition operation.
|
208
|
-
#
|
209
|
-
# @example
|
210
|
-
# require "google/cloud/speech"
|
211
|
-
#
|
212
|
-
# speech = Google::Cloud::Speech.new
|
213
|
-
#
|
214
|
-
# op = speech.process "path/to/audio.raw",
|
215
|
-
# encoding: :linear16,
|
216
|
-
# language: "en-US",
|
217
|
-
# sample_rate: 16000
|
218
|
-
#
|
219
|
-
# op.done? #=> false
|
220
|
-
# op.reload! # API call
|
221
|
-
# op.done? #=> true
|
222
|
-
#
|
223
|
-
def reload!
|
224
|
-
@grpc.reload!
|
225
|
-
self
|
226
|
-
end
|
227
|
-
alias refresh! reload!
|
228
|
-
|
229
|
-
##
|
230
|
-
# Reloads the op until the operation is complete. The delay between
|
231
|
-
# reloads will incrementally increase.
|
232
|
-
#
|
233
|
-
# @example
|
234
|
-
# require "google/cloud/speech"
|
235
|
-
#
|
236
|
-
# speech = Google::Cloud::Speech.new
|
237
|
-
#
|
238
|
-
# op = speech.process "path/to/audio.raw",
|
239
|
-
# encoding: :linear16,
|
240
|
-
# language: "en-US",
|
241
|
-
# sample_rate: 16000
|
242
|
-
#
|
243
|
-
# op.done? #=> false
|
244
|
-
# op.wait_until_done!
|
245
|
-
# op.done? #=> true
|
246
|
-
#
|
247
|
-
def wait_until_done!
|
248
|
-
@grpc.wait_until_done!
|
249
|
-
end
|
250
|
-
|
251
|
-
##
|
252
|
-
# @private New Result::Job from a Google::Gax::Operation
|
253
|
-
# object.
|
254
|
-
def self.from_grpc grpc
|
255
|
-
new.tap do |job|
|
256
|
-
job.instance_variable_set :@grpc, grpc
|
257
|
-
end
|
258
|
-
end
|
259
|
-
end
|
260
|
-
end
|
261
|
-
end
|
262
|
-
end
|
@@ -1,651 +0,0 @@
|
|
1
|
-
# Copyright 2016 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "google/cloud/errors"
|
17
|
-
require "google/cloud/speech/service"
|
18
|
-
require "google/cloud/speech/audio"
|
19
|
-
require "google/cloud/speech/result"
|
20
|
-
require "google/cloud/speech/operation"
|
21
|
-
require "google/cloud/speech/stream"
|
22
|
-
|
23
|
-
module Google
|
24
|
-
module Cloud
|
25
|
-
module Speech
|
26
|
-
##
|
27
|
-
# # Project
|
28
|
-
#
|
29
|
-
# The Google Cloud Speech API enables developers to convert audio to text
|
30
|
-
# by applying powerful neural network models. The API recognizes over 80
|
31
|
-
# languages and variants, to support your global user base. You can
|
32
|
-
# transcribe the text of users dictating to an application's microphone,
|
33
|
-
# enable command-and-control through voice, or transcribe audio files,
|
34
|
-
# among many other use cases. Recognize audio uploaded in the request, and
|
35
|
-
# integrate with your audio storage on Google Cloud Storage, by using the
|
36
|
-
# same technology Google uses to power its own products.
|
37
|
-
#
|
38
|
-
# See {Google::Cloud#speech}
|
39
|
-
#
|
40
|
-
# @example
|
41
|
-
# require "google/cloud/speech"
|
42
|
-
#
|
43
|
-
# speech = Google::Cloud::Speech.new
|
44
|
-
#
|
45
|
-
# audio = speech.audio "path/to/audio.raw",
|
46
|
-
# encoding: :linear16,
|
47
|
-
# language: "en-US",
|
48
|
-
# sample_rate: 16000
|
49
|
-
# results = audio.recognize
|
50
|
-
#
|
51
|
-
# result = results.first
|
52
|
-
# result.transcript #=> "how old is the Brooklyn Bridge"
|
53
|
-
# result.confidence #=> 0.9826789498329163
|
54
|
-
#
|
55
|
-
class Project
|
56
|
-
##
|
57
|
-
# @private The gRPC Service object.
|
58
|
-
attr_accessor :service
|
59
|
-
|
60
|
-
##
|
61
|
-
# @private Creates a new Speech Project instance.
|
62
|
-
def initialize service
|
63
|
-
@service = service
|
64
|
-
end
|
65
|
-
|
66
|
-
# The Speech project connected to.
|
67
|
-
#
|
68
|
-
# @example
|
69
|
-
# require "google/cloud/speech"
|
70
|
-
#
|
71
|
-
# speech = Google::Cloud::Speech.new(
|
72
|
-
# project_id: "my-project",
|
73
|
-
# credentials: "/path/to/keyfile.json"
|
74
|
-
# )
|
75
|
-
#
|
76
|
-
# speech.project_id #=> "my-project"
|
77
|
-
#
|
78
|
-
def project_id
|
79
|
-
service.project
|
80
|
-
end
|
81
|
-
alias project project_id
|
82
|
-
|
83
|
-
##
|
84
|
-
# Returns a new Audio instance from the given source. No API call is
|
85
|
-
# made.
|
86
|
-
#
|
87
|
-
# @see https://cloud.google.com/speech/docs/basics#audio-encodings
|
88
|
-
# Audio Encodings
|
89
|
-
# @see https://cloud.google.com/speech/docs/basics#sample-rates
|
90
|
-
# Sample Rates
|
91
|
-
# @see https://cloud.google.com/speech/docs/basics#languages
|
92
|
-
# Languages
|
93
|
-
#
|
94
|
-
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
95
|
-
# the path to the audio file to be recognized, or a File or other IO
|
96
|
-
# object of the audio contents, or a Cloud Storage URI of the form
|
97
|
-
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
98
|
-
# Google::Cloud::Storage::File of the text to be annotated.
|
99
|
-
# @param [String, Symbol] encoding Encoding of audio data to be
|
100
|
-
# recognized. Optional.
|
101
|
-
#
|
102
|
-
# Acceptable values are:
|
103
|
-
#
|
104
|
-
# * `linear16` - Uncompressed 16-bit signed little-endian samples.
|
105
|
-
# (LINEAR16)
|
106
|
-
# * `flac` - The [Free Lossless Audio
|
107
|
-
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
108
|
-
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
109
|
-
# are supported. (FLAC)
|
110
|
-
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
111
|
-
# G.711 PCMU/mu-law. (MULAW)
|
112
|
-
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
113
|
-
# be 8000 Hz.) (AMR)
|
114
|
-
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
115
|
-
# be 16000 Hz.) (AMR_WB)
|
116
|
-
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
117
|
-
#
|
118
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
119
|
-
# speech transcription.
|
120
|
-
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
121
|
-
#
|
122
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
123
|
-
# speech transcription. If you must use a low-bitrate encoder,
|
124
|
-
# OGG_OPUS is preferred.
|
125
|
-
#
|
126
|
-
# @param [String,Symbol] language The language of the supplied audio as
|
127
|
-
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
128
|
-
# "en-US" for English (United States), "en-GB" for English (United
|
129
|
-
# Kingdom), "fr-FR" for French (France). See [Language
|
130
|
-
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
131
|
-
# of the currently supported language codes. Optional.
|
132
|
-
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
133
|
-
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
134
|
-
# For best results, set the sampling rate of the audio source to 16000
|
135
|
-
# Hz. If that's not possible, use the native sample rate of the audio
|
136
|
-
# source (instead of re-sampling). Optional.
|
137
|
-
#
|
138
|
-
# @return [Audio] The audio file to be recognized.
|
139
|
-
#
|
140
|
-
# @example
|
141
|
-
# require "google/cloud/speech"
|
142
|
-
#
|
143
|
-
# speech = Google::Cloud::Speech.new
|
144
|
-
#
|
145
|
-
# audio = speech.audio "path/to/audio.raw",
|
146
|
-
# encoding: :linear16,
|
147
|
-
# language: "en-US",
|
148
|
-
# sample_rate: 16000
|
149
|
-
#
|
150
|
-
# @example With a Google Cloud Storage URI:
|
151
|
-
# require "google/cloud/speech"
|
152
|
-
#
|
153
|
-
# speech = Google::Cloud::Speech.new
|
154
|
-
#
|
155
|
-
# audio = speech.audio "gs://bucket-name/path/to/audio.raw",
|
156
|
-
# encoding: :linear16,
|
157
|
-
# language: "en-US",
|
158
|
-
# sample_rate: 16000
|
159
|
-
#
|
160
|
-
# @example With a Google Cloud Storage File object:
|
161
|
-
# require "google/cloud/storage"
|
162
|
-
#
|
163
|
-
# storage = Google::Cloud::Storage.new
|
164
|
-
#
|
165
|
-
# bucket = storage.bucket "bucket-name"
|
166
|
-
# file = bucket.file "path/to/audio.raw"
|
167
|
-
#
|
168
|
-
# require "google/cloud/speech"
|
169
|
-
#
|
170
|
-
# speech = Google::Cloud::Speech.new
|
171
|
-
#
|
172
|
-
# audio = speech.audio file,
|
173
|
-
# encoding: :linear16,
|
174
|
-
# language: "en-US",
|
175
|
-
# sample_rate: 16000
|
176
|
-
#
|
177
|
-
def audio source, encoding: nil, language: nil, sample_rate: nil
|
178
|
-
audio = if source.is_a? Audio
|
179
|
-
source.dup
|
180
|
-
else
|
181
|
-
Audio.from_source source, self
|
182
|
-
end
|
183
|
-
audio.encoding = encoding unless encoding.nil?
|
184
|
-
audio.language = language unless language.nil?
|
185
|
-
audio.sample_rate = sample_rate unless sample_rate.nil?
|
186
|
-
audio
|
187
|
-
end
|
188
|
-
|
189
|
-
##
|
190
|
-
# Performs synchronous speech recognition. Sends audio data to the
|
191
|
-
# Speech API, which performs recognition on that data, and returns
|
192
|
-
# results only after all audio has been processed. Limited to audio data
|
193
|
-
# of 1 minute or less in duration.
|
194
|
-
#
|
195
|
-
# The Speech API will take roughly the same amount of time to process
|
196
|
-
# audio data sent synchronously as the duration of the supplied audio
|
197
|
-
# data. That is, if you send audio data of 30 seconds in length, expect
|
198
|
-
# the synchronous request to take approximately 30 seconds to return
|
199
|
-
# results.
|
200
|
-
#
|
201
|
-
# @see https://cloud.google.com/speech/docs/basics#synchronous-recognition
|
202
|
-
# Synchronous Speech API Recognition
|
203
|
-
# @see https://cloud.google.com/speech/docs/basics#phrase-hints
|
204
|
-
# Phrase Hints
|
205
|
-
#
|
206
|
-
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
207
|
-
# the path to the audio file to be recognized, or a File or other IO
|
208
|
-
# object of the audio contents, or a Cloud Storage URI of the form
|
209
|
-
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
210
|
-
# Google::Cloud::Storage::File of the text to be annotated.
|
211
|
-
# @param [String, Symbol] encoding Encoding of audio data to be
|
212
|
-
# recognized. Optional.
|
213
|
-
#
|
214
|
-
# Acceptable values are:
|
215
|
-
#
|
216
|
-
# * `linear16` - Uncompressed 16-bit signed little-endian samples.
|
217
|
-
# (LINEAR16)
|
218
|
-
# * `flac` - The [Free Lossless Audio
|
219
|
-
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
220
|
-
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
221
|
-
# are supported. (FLAC)
|
222
|
-
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
223
|
-
# G.711 PCMU/mu-law. (MULAW)
|
224
|
-
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
225
|
-
# be 8000 Hz.) (AMR)
|
226
|
-
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
227
|
-
# be 16000 Hz.) (AMR_WB)
|
228
|
-
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
229
|
-
#
|
230
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
231
|
-
# speech transcription.
|
232
|
-
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
233
|
-
#
|
234
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
235
|
-
# speech transcription. If you must use a low-bitrate encoder,
|
236
|
-
# OGG_OPUS is preferred.
|
237
|
-
#
|
238
|
-
# @param [String,Symbol] language The language of the supplied audio as
|
239
|
-
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
240
|
-
# "en-US" for English (United States), "en-GB" for English (United
|
241
|
-
# Kingdom), "fr-FR" for French (France). See [Language
|
242
|
-
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
243
|
-
# of the currently supported language codes. Optional.
|
244
|
-
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
245
|
-
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
246
|
-
# For best results, set the sampling rate of the audio source to 16000
|
247
|
-
# Hz. If that's not possible, use the native sample rate of the audio
|
248
|
-
# source (instead of re-sampling). Optional.
|
249
|
-
# @param [String] max_alternatives The Maximum number of recognition
|
250
|
-
# hypotheses to be returned. Default is 1. The service may return
|
251
|
-
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
252
|
-
# @param [Boolean] profanity_filter When `true`, the service will
|
253
|
-
# attempt to filter out profanities, replacing all but the initial
|
254
|
-
# character in each filtered word with asterisks, e.g. "f***". Default
|
255
|
-
# is `false`.
|
256
|
-
# @param [Array<String>] phrases A list of strings containing words and
|
257
|
-
# phrases "hints" so that the speech recognition is more likely to
|
258
|
-
# recognize them. See [usage
|
259
|
-
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
260
|
-
# @param [Boolean] words When `true`, return a list of words with
|
261
|
-
# additional information about each word. Currently, the only
|
262
|
-
# additional information provided is the the start and end time
|
263
|
-
# offsets. See {Result#words}. Default is `false`.
|
264
|
-
#
|
265
|
-
# @return [Array<Result>] The transcribed text of audio recognized.
|
266
|
-
#
|
267
|
-
# @example
|
268
|
-
# require "google/cloud/speech"
|
269
|
-
#
|
270
|
-
# speech = Google::Cloud::Speech.new
|
271
|
-
#
|
272
|
-
# results = speech.recognize "path/to/audio.raw",
|
273
|
-
# encoding: :linear16,
|
274
|
-
# language: "en-US",
|
275
|
-
# sample_rate: 16000
|
276
|
-
#
|
277
|
-
# @example With a Google Cloud Storage URI:
|
278
|
-
# require "google/cloud/speech"
|
279
|
-
#
|
280
|
-
# speech = Google::Cloud::Speech.new
|
281
|
-
#
|
282
|
-
# results = speech.recognize "gs://bucket-name/path/to/audio.raw",
|
283
|
-
# encoding: :linear16,
|
284
|
-
# language: "en-US",
|
285
|
-
# sample_rate: 16000
|
286
|
-
#
|
287
|
-
# @example With a Google Cloud Storage File object:
|
288
|
-
# require "google/cloud/storage"
|
289
|
-
#
|
290
|
-
# storage = Google::Cloud::Storage.new
|
291
|
-
#
|
292
|
-
# bucket = storage.bucket "bucket-name"
|
293
|
-
# file = bucket.file "path/to/audio.raw"
|
294
|
-
#
|
295
|
-
# require "google/cloud/speech"
|
296
|
-
#
|
297
|
-
# speech = Google::Cloud::Speech.new
|
298
|
-
#
|
299
|
-
# results = speech.recognize file,
|
300
|
-
# encoding: :linear16,
|
301
|
-
# language: "en-US",
|
302
|
-
# sample_rate: 16000,
|
303
|
-
# max_alternatives: 10
|
304
|
-
#
|
305
|
-
def recognize source, encoding: nil, language: nil, sample_rate: nil,
|
306
|
-
max_alternatives: nil, profanity_filter: nil,
|
307
|
-
phrases: nil, words: nil
|
308
|
-
ensure_service!
|
309
|
-
|
310
|
-
audio_obj = audio source, encoding: encoding, language: language,
|
311
|
-
sample_rate: sample_rate
|
312
|
-
|
313
|
-
config = audio_config(
|
314
|
-
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
315
|
-
language: audio_obj.language, max_alternatives: max_alternatives,
|
316
|
-
profanity_filter: profanity_filter, phrases: phrases,
|
317
|
-
words: words
|
318
|
-
)
|
319
|
-
|
320
|
-
grpc = service.recognize_sync audio_obj.to_grpc, config
|
321
|
-
grpc.results.map do |result_grpc|
|
322
|
-
Result.from_grpc result_grpc
|
323
|
-
end
|
324
|
-
end
|
325
|
-
|
326
|
-
##
|
327
|
-
# Performs asynchronous speech recognition. Requests are processed
|
328
|
-
# asynchronously, meaning a Operation is returned once the audio data
|
329
|
-
# has been sent, and can be refreshed to retrieve recognition results
|
330
|
-
# once the audio data has been processed.
|
331
|
-
#
|
332
|
-
# @see https://cloud.google.com/speech/docs/basics#async-responses
|
333
|
-
# Asynchronous Speech API Responses
|
334
|
-
#
|
335
|
-
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
336
|
-
# the path to the audio file to be recognized, or a File or other IO
|
337
|
-
# object of the audio contents, or a Cloud Storage URI of the form
|
338
|
-
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
339
|
-
# Google::Cloud::Storage::File of the text to be annotated.
|
340
|
-
# @param [String, Symbol] encoding Encoding of audio data to be
|
341
|
-
# recognized. Optional.
|
342
|
-
#
|
343
|
-
# Acceptable values are:
|
344
|
-
#
|
345
|
-
# * `linear16` - Uncompressed 16-bit signed little-endian samples.
|
346
|
-
# (LINEAR16)
|
347
|
-
# * `flac` - The [Free Lossless Audio
|
348
|
-
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
349
|
-
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
350
|
-
# are supported. (FLAC)
|
351
|
-
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
352
|
-
# G.711 PCMU/mu-law. (MULAW)
|
353
|
-
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
354
|
-
# be 8000 Hz.) (AMR)
|
355
|
-
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
356
|
-
# be 16000 Hz.) (AMR_WB)
|
357
|
-
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
358
|
-
#
|
359
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
360
|
-
# speech transcription.
|
361
|
-
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
362
|
-
#
|
363
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
364
|
-
# speech transcription. If you must use a low-bitrate encoder,
|
365
|
-
# OGG_OPUS is preferred.
|
366
|
-
#
|
367
|
-
# @param [String,Symbol] language The language of the supplied audio as
|
368
|
-
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
369
|
-
# "en-US" for English (United States), "en-GB" for English (United
|
370
|
-
# Kingdom), "fr-FR" for French (France). See [Language
|
371
|
-
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
372
|
-
# of the currently supported language codes. Optional.
|
373
|
-
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
374
|
-
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
375
|
-
# For best results, set the sampling rate of the audio source to 16000
|
376
|
-
# Hz. If that's not possible, use the native sample rate of the audio
|
377
|
-
# source (instead of re-sampling). Optional.
|
378
|
-
# @param [String] max_alternatives The Maximum number of recognition
|
379
|
-
# hypotheses to be returned. Default is 1. The service may return
|
380
|
-
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
381
|
-
# @param [Boolean] profanity_filter When `true`, the service will
|
382
|
-
# attempt to filter out profanities, replacing all but the initial
|
383
|
-
# character in each filtered word with asterisks, e.g. "f***". Default
|
384
|
-
# is `false`.
|
385
|
-
# @param [Array<String>] phrases A list of strings containing words and
|
386
|
-
# phrases "hints" so that the speech recognition is more likely to
|
387
|
-
# recognize them. See [usage
|
388
|
-
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
389
|
-
# @param [Boolean] words When `true`, return a list of words with
|
390
|
-
# additional information about each word. Currently, the only
|
391
|
-
# additional information provided is the the start and end time
|
392
|
-
# offsets. See {Result#words}. Default is `false`.
|
393
|
-
#
|
394
|
-
# @return [Operation] A resource represents the long-running,
|
395
|
-
# asynchronous processing of a speech-recognition operation.
|
396
|
-
#
|
397
|
-
# @example
|
398
|
-
# require "google/cloud/speech"
|
399
|
-
#
|
400
|
-
# speech = Google::Cloud::Speech.new
|
401
|
-
#
|
402
|
-
# op = speech.process "path/to/audio.raw",
|
403
|
-
# encoding: :linear16,
|
404
|
-
# language: "en-US",
|
405
|
-
# sample_rate: 16000
|
406
|
-
#
|
407
|
-
# op.done? #=> false
|
408
|
-
# op.reload!
|
409
|
-
#
|
410
|
-
# @example With a Google Cloud Storage URI:
|
411
|
-
# require "google/cloud/speech"
|
412
|
-
#
|
413
|
-
# speech = Google::Cloud::Speech.new
|
414
|
-
#
|
415
|
-
# op = speech.process "gs://bucket-name/path/to/audio.raw",
|
416
|
-
# encoding: :linear16,
|
417
|
-
# language: "en-US",
|
418
|
-
# sample_rate: 16000
|
419
|
-
#
|
420
|
-
# op.done? #=> false
|
421
|
-
# op.reload!
|
422
|
-
#
|
423
|
-
# @example With a Google Cloud Storage File object:
|
424
|
-
# require "google/cloud/storage"
|
425
|
-
#
|
426
|
-
# storage = Google::Cloud::Storage.new
|
427
|
-
#
|
428
|
-
# bucket = storage.bucket "bucket-name"
|
429
|
-
# file = bucket.file "path/to/audio.raw"
|
430
|
-
#
|
431
|
-
# require "google/cloud/speech"
|
432
|
-
#
|
433
|
-
# speech = Google::Cloud::Speech.new
|
434
|
-
#
|
435
|
-
# op = speech.process file,
|
436
|
-
# encoding: :linear16,
|
437
|
-
# language: "en-US",
|
438
|
-
# sample_rate: 16000,
|
439
|
-
# max_alternatives: 10
|
440
|
-
#
|
441
|
-
# op.done? #=> false
|
442
|
-
# op.reload!
|
443
|
-
#
|
444
|
-
def process source, encoding: nil, sample_rate: nil, language: nil,
|
445
|
-
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
446
|
-
words: nil
|
447
|
-
ensure_service!
|
448
|
-
|
449
|
-
audio_obj = audio source, encoding: encoding, language: language,
|
450
|
-
sample_rate: sample_rate
|
451
|
-
|
452
|
-
config = audio_config(
|
453
|
-
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
454
|
-
language: audio_obj.language, max_alternatives: max_alternatives,
|
455
|
-
profanity_filter: profanity_filter, phrases: phrases,
|
456
|
-
words: words
|
457
|
-
)
|
458
|
-
|
459
|
-
grpc = service.recognize_async audio_obj.to_grpc, config
|
460
|
-
Operation.from_grpc grpc
|
461
|
-
end
|
462
|
-
alias long_running_recognize process
|
463
|
-
alias recognize_job process
|
464
|
-
|
465
|
-
##
|
466
|
-
# Creates a Stream object to perform bidirectional streaming
|
467
|
-
# speech-recognition: receive results while sending audio.
|
468
|
-
#
|
469
|
-
# @see https://cloud.google.com/speech/docs/basics#streaming-recognition
|
470
|
-
# Streaming Speech API Recognition Requests
|
471
|
-
#
|
472
|
-
# @param [String, Symbol] encoding Encoding of audio data to be
|
473
|
-
# recognized. Optional.
|
474
|
-
#
|
475
|
-
# Acceptable values are:
|
476
|
-
#
|
477
|
-
# * `linear16` - Uncompressed 16-bit signed little-endian samples.
|
478
|
-
# (LINEAR16)
|
479
|
-
# * `flac` - The [Free Lossless Audio
|
480
|
-
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
481
|
-
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
482
|
-
# are supported. (FLAC)
|
483
|
-
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
484
|
-
# G.711 PCMU/mu-law. (MULAW)
|
485
|
-
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
486
|
-
# be 8000 Hz.) (AMR)
|
487
|
-
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
488
|
-
# be 16000 Hz.) (AMR_WB)
|
489
|
-
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
490
|
-
#
|
491
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
492
|
-
# speech transcription.
|
493
|
-
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
494
|
-
#
|
495
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
496
|
-
# speech transcription. If you must use a low-bitrate encoder,
|
497
|
-
# OGG_OPUS is preferred.
|
498
|
-
#
|
499
|
-
# @param [String,Symbol] language The language of the supplied audio as
|
500
|
-
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
501
|
-
# "en-US" for English (United States), "en-GB" for English (United
|
502
|
-
# Kingdom), "fr-FR" for French (France). See [Language
|
503
|
-
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
504
|
-
# of the currently supported language codes. Optional.
|
505
|
-
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
506
|
-
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
507
|
-
# For best results, set the sampling rate of the audio source to 16000
|
508
|
-
# Hz. If that's not possible, use the native sample rate of the audio
|
509
|
-
# source (instead of re-sampling). Optional.
|
510
|
-
# @param [String] max_alternatives The Maximum number of recognition
|
511
|
-
# hypotheses to be returned. Default is 1. The service may return
|
512
|
-
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
513
|
-
# @param [Boolean] profanity_filter When `true`, the service will
|
514
|
-
# attempt to filter out profanities, replacing all but the initial
|
515
|
-
# character in each filtered word with asterisks, e.g. "f***". Default
|
516
|
-
# is `false`.
|
517
|
-
# @param [Array<String>] phrases A list of strings containing words and
|
518
|
-
# phrases "hints" so that the speech recognition is more likely to
|
519
|
-
# recognize them. See [usage
|
520
|
-
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
521
|
-
# @param [Boolean] words When `true`, return a list of words with
|
522
|
-
# additional information about each word. Currently, the only
|
523
|
-
# additional information provided is the the start and end time
|
524
|
-
# offsets. See {Result#words}. Default is `false`.
|
525
|
-
# @param [Boolean] utterance When `true`, the service will perform
|
526
|
-
# continuous recognition (continuing to process audio even if the user
|
527
|
-
# pauses speaking) until the client closes the output stream (gRPC
|
528
|
-
# API) or when the maximum time limit has been reached. Default is
|
529
|
-
# `false`.
|
530
|
-
# @param [Boolean] interim When `true`, interim results (tentative
|
531
|
-
# hypotheses) may be returned as they become available. Default is
|
532
|
-
# `false`.
|
533
|
-
#
|
534
|
-
# @return [Stream] A resource that represents the streaming requests and
|
535
|
-
# responses.
|
536
|
-
#
|
537
|
-
# @example
|
538
|
-
# require "google/cloud/speech"
|
539
|
-
#
|
540
|
-
# speech = Google::Cloud::Speech.new
|
541
|
-
#
|
542
|
-
# stream = speech.stream encoding: :linear16,
|
543
|
-
# language: "en-US",
|
544
|
-
# sample_rate: 16000
|
545
|
-
#
|
546
|
-
# # Stream 5 seconds of audio from the microphone
|
547
|
-
# # Actual implementation of microphone input varies by platform
|
548
|
-
# 5.times do
|
549
|
-
# stream.send MicrophoneInput.read(32000)
|
550
|
-
# end
|
551
|
-
#
|
552
|
-
# stream.stop
|
553
|
-
# stream.wait_until_complete!
|
554
|
-
#
|
555
|
-
# results = stream.results
|
556
|
-
# result = results.first
|
557
|
-
# result.transcript #=> "how old is the Brooklyn Bridge"
|
558
|
-
# result.confidence #=> 0.9826789498329163
|
559
|
-
#
|
560
|
-
def stream encoding: nil, language: nil, sample_rate: nil,
|
561
|
-
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
562
|
-
words: nil, utterance: nil, interim: nil
|
563
|
-
ensure_service!
|
564
|
-
|
565
|
-
grpc_req = V1::StreamingRecognizeRequest.new(
|
566
|
-
streaming_config: V1::StreamingRecognitionConfig.new(
|
567
|
-
{
|
568
|
-
config: audio_config(encoding: convert_encoding(encoding),
|
569
|
-
language: language,
|
570
|
-
sample_rate: sample_rate,
|
571
|
-
max_alternatives: max_alternatives,
|
572
|
-
profanity_filter: profanity_filter,
|
573
|
-
phrases: phrases, words: words),
|
574
|
-
single_utterance: utterance,
|
575
|
-
interim_results: interim
|
576
|
-
}.delete_if { |_, v| v.nil? }
|
577
|
-
)
|
578
|
-
)
|
579
|
-
|
580
|
-
Stream.new service, grpc_req
|
581
|
-
end
|
582
|
-
alias stream_recognize stream
|
583
|
-
|
584
|
-
##
|
585
|
-
# Performs asynchronous speech recognition. Requests are processed
|
586
|
-
# asynchronously, meaning a Operation is returned once the audio data
|
587
|
-
# has been sent, and can be refreshed to retrieve recognition results
|
588
|
-
# once the audio data has been processed.
|
589
|
-
#
|
590
|
-
# @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
|
591
|
-
# Long-running Operation
|
592
|
-
#
|
593
|
-
# @param [String] id The unique identifier for the long running
|
594
|
-
# operation. Required.
|
595
|
-
#
|
596
|
-
# @return [Operation] A resource represents the long-running,
|
597
|
-
# asynchronous processing of a speech-recognition operation.
|
598
|
-
#
|
599
|
-
# @example
|
600
|
-
# require "google/cloud/speech"
|
601
|
-
#
|
602
|
-
# speech = Google::Cloud::Speech.new
|
603
|
-
#
|
604
|
-
# op = speech.operation "1234567890"
|
605
|
-
#
|
606
|
-
# op.done? #=> false
|
607
|
-
# op.reload!
|
608
|
-
#
|
609
|
-
def operation id
|
610
|
-
ensure_service!
|
611
|
-
|
612
|
-
grpc = service.get_op id
|
613
|
-
Operation.from_grpc grpc
|
614
|
-
end
|
615
|
-
|
616
|
-
protected
|
617
|
-
|
618
|
-
def audio_config encoding: nil, language: nil, sample_rate: nil,
|
619
|
-
max_alternatives: nil, profanity_filter: nil,
|
620
|
-
phrases: nil, words: nil
|
621
|
-
contexts = nil
|
622
|
-
contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
|
623
|
-
language = String(language) unless language.nil?
|
624
|
-
V1::RecognitionConfig.new({
|
625
|
-
encoding: convert_encoding(encoding),
|
626
|
-
language_code: language,
|
627
|
-
sample_rate_hertz: sample_rate,
|
628
|
-
max_alternatives: max_alternatives,
|
629
|
-
profanity_filter: profanity_filter,
|
630
|
-
speech_contexts: contexts,
|
631
|
-
enable_word_time_offsets: words
|
632
|
-
}.delete_if { |_, v| v.nil? })
|
633
|
-
end
|
634
|
-
|
635
|
-
def convert_encoding encoding
|
636
|
-
mapping = { linear: :LINEAR16, linear16: :LINEAR16,
|
637
|
-
flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB,
|
638
|
-
ogg_opus: :OGG_OPUS, speex: :SPEEX_WITH_HEADER_BYTE }
|
639
|
-
mapping[encoding] || encoding
|
640
|
-
end
|
641
|
-
|
642
|
-
##
|
643
|
-
# @private Raise an error unless an active connection to the service is
|
644
|
-
# available.
|
645
|
-
def ensure_service!
|
646
|
-
raise "Must have active connection to service" unless service
|
647
|
-
end
|
648
|
-
end
|
649
|
-
end
|
650
|
-
end
|
651
|
-
end
|