google-cloud-speech 0.29.0 → 0.30.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +2 -1
- data/LICENSE +1 -1
- data/README.md +69 -43
- data/lib/google/cloud/speech.rb +94 -252
- data/lib/google/cloud/speech/v1.rb +11 -1
- data/lib/google/cloud/speech/v1/cloud_speech_services_pb.rb +1 -1
- data/lib/google/cloud/speech/{version.rb → v1/credentials.rb} +12 -2
- data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +2 -2
- data/lib/google/cloud/speech/v1/doc/google/longrunning/operations.rb +92 -0
- data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +1 -1
- data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +1 -1
- data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +1 -1
- data/lib/google/cloud/speech/v1/doc/overview.rb +1 -1
- data/lib/google/cloud/speech/v1/helpers.rb +93 -0
- data/lib/google/cloud/speech/v1/speech_client.rb +26 -49
- data/lib/google/cloud/speech/v1/speech_client_config.json +5 -5
- data/lib/google/cloud/speech/v1/stream.rb +614 -0
- data/lib/google/cloud/speech/v1p1beta1.rb +126 -0
- data/lib/google/cloud/speech/v1p1beta1/cloud_speech_pb.rb +175 -0
- data/lib/google/cloud/speech/v1p1beta1/cloud_speech_services_pb.rb +54 -0
- data/lib/google/cloud/speech/v1p1beta1/credentials.rb +32 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb +625 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/longrunning/operations.rb +92 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/any.rb +124 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/duration.rb +90 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/google/rpc/status.rb +83 -0
- data/lib/google/cloud/speech/v1p1beta1/doc/overview.rb +73 -0
- data/lib/google/cloud/speech/v1p1beta1/helpers.rb +93 -0
- data/lib/google/cloud/speech/v1p1beta1/speech_client.rb +322 -0
- data/lib/google/cloud/speech/v1p1beta1/speech_client_config.json +41 -0
- data/lib/google/cloud/speech/v1p1beta1/stream.rb +614 -0
- metadata +29 -120
- data/lib/google-cloud-speech.rb +0 -142
- data/lib/google/cloud/speech/audio.rb +0 -330
- data/lib/google/cloud/speech/convert.rb +0 -46
- data/lib/google/cloud/speech/credentials.rb +0 -57
- data/lib/google/cloud/speech/operation.rb +0 -262
- data/lib/google/cloud/speech/project.rb +0 -651
- data/lib/google/cloud/speech/result.rb +0 -240
- data/lib/google/cloud/speech/service.rb +0 -121
- data/lib/google/cloud/speech/stream.rb +0 -564
@@ -1,46 +0,0 @@
|
|
1
|
-
# Copyright 2017 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "google/protobuf/duration_pb"
|
17
|
-
|
18
|
-
module Google
|
19
|
-
module Cloud
|
20
|
-
module Speech
|
21
|
-
##
|
22
|
-
# @private Helper module for converting Speech values.
|
23
|
-
module Convert
|
24
|
-
module ClassMethods
|
25
|
-
def number_to_duration number
|
26
|
-
return nil if number.nil?
|
27
|
-
|
28
|
-
Google::Protobuf::Duration.new \
|
29
|
-
seconds: number.to_i,
|
30
|
-
nanos: (number.remainder(1) * 1000000000).round
|
31
|
-
end
|
32
|
-
|
33
|
-
def duration_to_number duration
|
34
|
-
return nil if duration.nil?
|
35
|
-
|
36
|
-
return duration.seconds if duration.nanos.zero?
|
37
|
-
|
38
|
-
duration.seconds + (duration.nanos / 1000000000.0)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
extend ClassMethods
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
@@ -1,57 +0,0 @@
|
|
1
|
-
# Copyright 2016 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "googleauth"
|
17
|
-
|
18
|
-
module Google
|
19
|
-
module Cloud
|
20
|
-
module Speech
|
21
|
-
##
|
22
|
-
# # Credentials
|
23
|
-
#
|
24
|
-
# Represents the authentication and authorization used to connect to the
|
25
|
-
# Speech API.
|
26
|
-
#
|
27
|
-
# @example
|
28
|
-
# require "google/cloud/speech"
|
29
|
-
#
|
30
|
-
# keyfile = "/path/to/keyfile.json"
|
31
|
-
# creds = Google::Cloud::Speech::Credentials.new keyfile
|
32
|
-
#
|
33
|
-
# speech = Google::Cloud::Speech.new(
|
34
|
-
# project_id: "my-project",
|
35
|
-
# credentials: creds
|
36
|
-
# )
|
37
|
-
#
|
38
|
-
# speech.project_id #=> "my-project"
|
39
|
-
#
|
40
|
-
class Credentials < Google::Auth::Credentials
|
41
|
-
SCOPE = ["https://www.googleapis.com/auth/cloud-platform"].freeze
|
42
|
-
PATH_ENV_VARS = %w[SPEECH_CREDENTIALS
|
43
|
-
SPEECH_KEYFILE
|
44
|
-
GOOGLE_CLOUD_CREDENTIALS
|
45
|
-
GOOGLE_CLOUD_KEYFILE
|
46
|
-
GCLOUD_KEYFILE].freeze
|
47
|
-
JSON_ENV_VARS = %w[SPEECH_CREDENTIALS_JSON
|
48
|
-
SPEECH_KEYFILE_JSON
|
49
|
-
GOOGLE_CLOUD_CREDENTIALS_JSON
|
50
|
-
GOOGLE_CLOUD_KEYFILE_JSON
|
51
|
-
GCLOUD_KEYFILE_JSON].freeze
|
52
|
-
DEFAULT_PATHS = \
|
53
|
-
["~/.config/gcloud/application_default_credentials.json"].freeze
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
@@ -1,262 +0,0 @@
|
|
1
|
-
# Copyright 2016 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "google/cloud/speech/v1"
|
17
|
-
require "google/cloud/errors"
|
18
|
-
|
19
|
-
module Google
|
20
|
-
module Cloud
|
21
|
-
module Speech
|
22
|
-
##
|
23
|
-
# # Operation
|
24
|
-
#
|
25
|
-
# A resource represents the long-running, asynchronous processing of a
|
26
|
-
# speech-recognition operation. The op can be refreshed to retrieve
|
27
|
-
# recognition results once the audio data has been processed.
|
28
|
-
#
|
29
|
-
# See {Project#process} and {Audio#process}.
|
30
|
-
#
|
31
|
-
# @see https://cloud.google.com/speech/docs/basics#async-responses
|
32
|
-
# Asynchronous Speech API Responses
|
33
|
-
# @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
|
34
|
-
# Long-running Operation
|
35
|
-
#
|
36
|
-
# @example
|
37
|
-
# require "google/cloud/speech"
|
38
|
-
#
|
39
|
-
# speech = Google::Cloud::Speech.new
|
40
|
-
#
|
41
|
-
# op = speech.process "path/to/audio.raw",
|
42
|
-
# encoding: :linear16,
|
43
|
-
# language: "en-US",
|
44
|
-
# sample_rate: 16000
|
45
|
-
#
|
46
|
-
# op.done? #=> false
|
47
|
-
# op.reload! # API call
|
48
|
-
# op.done? #=> true
|
49
|
-
# results = op.results
|
50
|
-
#
|
51
|
-
class Operation
|
52
|
-
##
|
53
|
-
# @private The Google::Gax::Operation gRPC object.
|
54
|
-
attr_accessor :grpc
|
55
|
-
|
56
|
-
##
|
57
|
-
# @private Creates a new Job instance.
|
58
|
-
def initialize
|
59
|
-
@grpc = nil
|
60
|
-
end
|
61
|
-
|
62
|
-
##
|
63
|
-
# The unique identifier for the long running operation.
|
64
|
-
#
|
65
|
-
# @return [String] The unique identifier for the long running operation.
|
66
|
-
#
|
67
|
-
# @example
|
68
|
-
# require "google/cloud/speech"
|
69
|
-
#
|
70
|
-
# speech = Google::Cloud::Speech.new
|
71
|
-
#
|
72
|
-
# op = speech.process "path/to/audio.raw",
|
73
|
-
# encoding: :linear16,
|
74
|
-
# language: "en-US",
|
75
|
-
# sample_rate: 16000
|
76
|
-
#
|
77
|
-
# op.id #=> "1234567890"
|
78
|
-
#
|
79
|
-
def id
|
80
|
-
@grpc.name
|
81
|
-
end
|
82
|
-
|
83
|
-
##
|
84
|
-
# Checks if the speech-recognition processing of the audio data is
|
85
|
-
# complete.
|
86
|
-
#
|
87
|
-
# @return [boolean] `true` when complete, `false` otherwise.
|
88
|
-
#
|
89
|
-
# @example
|
90
|
-
# require "google/cloud/speech"
|
91
|
-
#
|
92
|
-
# speech = Google::Cloud::Speech.new
|
93
|
-
#
|
94
|
-
# op = speech.process "path/to/audio.raw",
|
95
|
-
# encoding: :linear16,
|
96
|
-
# language: "en-US",
|
97
|
-
# sample_rate: 16000
|
98
|
-
#
|
99
|
-
# op.done? #=> false
|
100
|
-
#
|
101
|
-
def done?
|
102
|
-
@grpc.done?
|
103
|
-
end
|
104
|
-
|
105
|
-
##
|
106
|
-
# A speech recognition result corresponding to a portion of the audio.
|
107
|
-
#
|
108
|
-
# @return [Array<Result>] The transcribed text of audio recognized. If
|
109
|
-
# the op is not done this will return `nil`.
|
110
|
-
#
|
111
|
-
# @example
|
112
|
-
# require "google/cloud/speech"
|
113
|
-
#
|
114
|
-
# speech = Google::Cloud::Speech.new
|
115
|
-
#
|
116
|
-
# op = speech.process "path/to/audio.raw",
|
117
|
-
# encoding: :linear16,
|
118
|
-
# language: "en-US",
|
119
|
-
# sample_rate: 16000
|
120
|
-
#
|
121
|
-
# op.done? #=> true
|
122
|
-
# op.results? #=> true
|
123
|
-
# results = op.results
|
124
|
-
#
|
125
|
-
def results
|
126
|
-
return nil unless results?
|
127
|
-
@grpc.response.results.map do |result_grpc|
|
128
|
-
Result.from_grpc result_grpc
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
##
|
133
|
-
# Checks if the speech-recognition processing of the audio data is
|
134
|
-
# complete.
|
135
|
-
#
|
136
|
-
# @return [boolean] `true` when complete, `false` otherwise.
|
137
|
-
#
|
138
|
-
# @example
|
139
|
-
# require "google/cloud/speech"
|
140
|
-
#
|
141
|
-
# speech = Google::Cloud::Speech.new
|
142
|
-
#
|
143
|
-
# op = speech.process "path/to/audio.raw",
|
144
|
-
# encoding: :linear16,
|
145
|
-
# language: "en-US",
|
146
|
-
# sample_rate: 16000
|
147
|
-
#
|
148
|
-
# op.done? #=> true
|
149
|
-
# op.results? #=> true
|
150
|
-
# results = op.results
|
151
|
-
#
|
152
|
-
def results?
|
153
|
-
@grpc.response?
|
154
|
-
end
|
155
|
-
|
156
|
-
##
|
157
|
-
# The error information if the speech-recognition processing of the
|
158
|
-
# audio data has returned an error.
|
159
|
-
#
|
160
|
-
# @return [Google::Cloud::Error] The error.
|
161
|
-
#
|
162
|
-
# @example
|
163
|
-
# require "google/cloud/speech"
|
164
|
-
#
|
165
|
-
# speech = Google::Cloud::Speech.new
|
166
|
-
#
|
167
|
-
# op = speech.process "path/to/audio.raw",
|
168
|
-
# encoding: :linear16,
|
169
|
-
# language: "en-US",
|
170
|
-
# sample_rate: 16000
|
171
|
-
#
|
172
|
-
# op.done? #=> true
|
173
|
-
# op.error? #=> true
|
174
|
-
# error = op.error
|
175
|
-
#
|
176
|
-
def error
|
177
|
-
return nil unless error?
|
178
|
-
Google::Cloud::Error.from_error @grpc.error
|
179
|
-
end
|
180
|
-
|
181
|
-
##
|
182
|
-
# Checks if the speech-recognition processing of the audio data has
|
183
|
-
# returned an error.
|
184
|
-
#
|
185
|
-
# @return [boolean] `true` when errored, `false` otherwise.
|
186
|
-
#
|
187
|
-
# @example
|
188
|
-
# require "google/cloud/speech"
|
189
|
-
#
|
190
|
-
# speech = Google::Cloud::Speech.new
|
191
|
-
#
|
192
|
-
# op = speech.process "path/to/audio.raw",
|
193
|
-
# encoding: :linear16,
|
194
|
-
# language: "en-US",
|
195
|
-
# sample_rate: 16000
|
196
|
-
#
|
197
|
-
# op.done? #=> true
|
198
|
-
# op.error? #=> true
|
199
|
-
# error = op.error
|
200
|
-
#
|
201
|
-
def error?
|
202
|
-
@grpc.error?
|
203
|
-
end
|
204
|
-
|
205
|
-
##
|
206
|
-
# Reloads the op with current data from the long-running, asynchronous
|
207
|
-
# processing of a speech-recognition operation.
|
208
|
-
#
|
209
|
-
# @example
|
210
|
-
# require "google/cloud/speech"
|
211
|
-
#
|
212
|
-
# speech = Google::Cloud::Speech.new
|
213
|
-
#
|
214
|
-
# op = speech.process "path/to/audio.raw",
|
215
|
-
# encoding: :linear16,
|
216
|
-
# language: "en-US",
|
217
|
-
# sample_rate: 16000
|
218
|
-
#
|
219
|
-
# op.done? #=> false
|
220
|
-
# op.reload! # API call
|
221
|
-
# op.done? #=> true
|
222
|
-
#
|
223
|
-
def reload!
|
224
|
-
@grpc.reload!
|
225
|
-
self
|
226
|
-
end
|
227
|
-
alias refresh! reload!
|
228
|
-
|
229
|
-
##
|
230
|
-
# Reloads the op until the operation is complete. The delay between
|
231
|
-
# reloads will incrementally increase.
|
232
|
-
#
|
233
|
-
# @example
|
234
|
-
# require "google/cloud/speech"
|
235
|
-
#
|
236
|
-
# speech = Google::Cloud::Speech.new
|
237
|
-
#
|
238
|
-
# op = speech.process "path/to/audio.raw",
|
239
|
-
# encoding: :linear16,
|
240
|
-
# language: "en-US",
|
241
|
-
# sample_rate: 16000
|
242
|
-
#
|
243
|
-
# op.done? #=> false
|
244
|
-
# op.wait_until_done!
|
245
|
-
# op.done? #=> true
|
246
|
-
#
|
247
|
-
def wait_until_done!
|
248
|
-
@grpc.wait_until_done!
|
249
|
-
end
|
250
|
-
|
251
|
-
##
|
252
|
-
# @private New Result::Job from a Google::Gax::Operation
|
253
|
-
# object.
|
254
|
-
def self.from_grpc grpc
|
255
|
-
new.tap do |job|
|
256
|
-
job.instance_variable_set :@grpc, grpc
|
257
|
-
end
|
258
|
-
end
|
259
|
-
end
|
260
|
-
end
|
261
|
-
end
|
262
|
-
end
|
@@ -1,651 +0,0 @@
|
|
1
|
-
# Copyright 2016 Google LLC
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
|
16
|
-
require "google/cloud/errors"
|
17
|
-
require "google/cloud/speech/service"
|
18
|
-
require "google/cloud/speech/audio"
|
19
|
-
require "google/cloud/speech/result"
|
20
|
-
require "google/cloud/speech/operation"
|
21
|
-
require "google/cloud/speech/stream"
|
22
|
-
|
23
|
-
module Google
|
24
|
-
module Cloud
|
25
|
-
module Speech
|
26
|
-
##
|
27
|
-
# # Project
|
28
|
-
#
|
29
|
-
# The Google Cloud Speech API enables developers to convert audio to text
|
30
|
-
# by applying powerful neural network models. The API recognizes over 80
|
31
|
-
# languages and variants, to support your global user base. You can
|
32
|
-
# transcribe the text of users dictating to an application's microphone,
|
33
|
-
# enable command-and-control through voice, or transcribe audio files,
|
34
|
-
# among many other use cases. Recognize audio uploaded in the request, and
|
35
|
-
# integrate with your audio storage on Google Cloud Storage, by using the
|
36
|
-
# same technology Google uses to power its own products.
|
37
|
-
#
|
38
|
-
# See {Google::Cloud#speech}
|
39
|
-
#
|
40
|
-
# @example
|
41
|
-
# require "google/cloud/speech"
|
42
|
-
#
|
43
|
-
# speech = Google::Cloud::Speech.new
|
44
|
-
#
|
45
|
-
# audio = speech.audio "path/to/audio.raw",
|
46
|
-
# encoding: :linear16,
|
47
|
-
# language: "en-US",
|
48
|
-
# sample_rate: 16000
|
49
|
-
# results = audio.recognize
|
50
|
-
#
|
51
|
-
# result = results.first
|
52
|
-
# result.transcript #=> "how old is the Brooklyn Bridge"
|
53
|
-
# result.confidence #=> 0.9826789498329163
|
54
|
-
#
|
55
|
-
class Project
|
56
|
-
##
|
57
|
-
# @private The gRPC Service object.
|
58
|
-
attr_accessor :service
|
59
|
-
|
60
|
-
##
|
61
|
-
# @private Creates a new Speech Project instance.
|
62
|
-
def initialize service
|
63
|
-
@service = service
|
64
|
-
end
|
65
|
-
|
66
|
-
# The Speech project connected to.
|
67
|
-
#
|
68
|
-
# @example
|
69
|
-
# require "google/cloud/speech"
|
70
|
-
#
|
71
|
-
# speech = Google::Cloud::Speech.new(
|
72
|
-
# project_id: "my-project",
|
73
|
-
# credentials: "/path/to/keyfile.json"
|
74
|
-
# )
|
75
|
-
#
|
76
|
-
# speech.project_id #=> "my-project"
|
77
|
-
#
|
78
|
-
def project_id
|
79
|
-
service.project
|
80
|
-
end
|
81
|
-
alias project project_id
|
82
|
-
|
83
|
-
##
|
84
|
-
# Returns a new Audio instance from the given source. No API call is
|
85
|
-
# made.
|
86
|
-
#
|
87
|
-
# @see https://cloud.google.com/speech/docs/basics#audio-encodings
|
88
|
-
# Audio Encodings
|
89
|
-
# @see https://cloud.google.com/speech/docs/basics#sample-rates
|
90
|
-
# Sample Rates
|
91
|
-
# @see https://cloud.google.com/speech/docs/basics#languages
|
92
|
-
# Languages
|
93
|
-
#
|
94
|
-
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
95
|
-
# the path to the audio file to be recognized, or a File or other IO
|
96
|
-
# object of the audio contents, or a Cloud Storage URI of the form
|
97
|
-
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
98
|
-
# Google::Cloud::Storage::File of the text to be annotated.
|
99
|
-
# @param [String, Symbol] encoding Encoding of audio data to be
|
100
|
-
# recognized. Optional.
|
101
|
-
#
|
102
|
-
# Acceptable values are:
|
103
|
-
#
|
104
|
-
# * `linear16` - Uncompressed 16-bit signed little-endian samples.
|
105
|
-
# (LINEAR16)
|
106
|
-
# * `flac` - The [Free Lossless Audio
|
107
|
-
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
108
|
-
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
109
|
-
# are supported. (FLAC)
|
110
|
-
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
111
|
-
# G.711 PCMU/mu-law. (MULAW)
|
112
|
-
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
113
|
-
# be 8000 Hz.) (AMR)
|
114
|
-
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
115
|
-
# be 16000 Hz.) (AMR_WB)
|
116
|
-
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
117
|
-
#
|
118
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
119
|
-
# speech transcription.
|
120
|
-
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
121
|
-
#
|
122
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
123
|
-
# speech transcription. If you must use a low-bitrate encoder,
|
124
|
-
# OGG_OPUS is preferred.
|
125
|
-
#
|
126
|
-
# @param [String,Symbol] language The language of the supplied audio as
|
127
|
-
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
128
|
-
# "en-US" for English (United States), "en-GB" for English (United
|
129
|
-
# Kingdom), "fr-FR" for French (France). See [Language
|
130
|
-
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
131
|
-
# of the currently supported language codes. Optional.
|
132
|
-
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
133
|
-
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
134
|
-
# For best results, set the sampling rate of the audio source to 16000
|
135
|
-
# Hz. If that's not possible, use the native sample rate of the audio
|
136
|
-
# source (instead of re-sampling). Optional.
|
137
|
-
#
|
138
|
-
# @return [Audio] The audio file to be recognized.
|
139
|
-
#
|
140
|
-
# @example
|
141
|
-
# require "google/cloud/speech"
|
142
|
-
#
|
143
|
-
# speech = Google::Cloud::Speech.new
|
144
|
-
#
|
145
|
-
# audio = speech.audio "path/to/audio.raw",
|
146
|
-
# encoding: :linear16,
|
147
|
-
# language: "en-US",
|
148
|
-
# sample_rate: 16000
|
149
|
-
#
|
150
|
-
# @example With a Google Cloud Storage URI:
|
151
|
-
# require "google/cloud/speech"
|
152
|
-
#
|
153
|
-
# speech = Google::Cloud::Speech.new
|
154
|
-
#
|
155
|
-
# audio = speech.audio "gs://bucket-name/path/to/audio.raw",
|
156
|
-
# encoding: :linear16,
|
157
|
-
# language: "en-US",
|
158
|
-
# sample_rate: 16000
|
159
|
-
#
|
160
|
-
# @example With a Google Cloud Storage File object:
|
161
|
-
# require "google/cloud/storage"
|
162
|
-
#
|
163
|
-
# storage = Google::Cloud::Storage.new
|
164
|
-
#
|
165
|
-
# bucket = storage.bucket "bucket-name"
|
166
|
-
# file = bucket.file "path/to/audio.raw"
|
167
|
-
#
|
168
|
-
# require "google/cloud/speech"
|
169
|
-
#
|
170
|
-
# speech = Google::Cloud::Speech.new
|
171
|
-
#
|
172
|
-
# audio = speech.audio file,
|
173
|
-
# encoding: :linear16,
|
174
|
-
# language: "en-US",
|
175
|
-
# sample_rate: 16000
|
176
|
-
#
|
177
|
-
def audio source, encoding: nil, language: nil, sample_rate: nil
|
178
|
-
audio = if source.is_a? Audio
|
179
|
-
source.dup
|
180
|
-
else
|
181
|
-
Audio.from_source source, self
|
182
|
-
end
|
183
|
-
audio.encoding = encoding unless encoding.nil?
|
184
|
-
audio.language = language unless language.nil?
|
185
|
-
audio.sample_rate = sample_rate unless sample_rate.nil?
|
186
|
-
audio
|
187
|
-
end
|
188
|
-
|
189
|
-
##
|
190
|
-
# Performs synchronous speech recognition. Sends audio data to the
|
191
|
-
# Speech API, which performs recognition on that data, and returns
|
192
|
-
# results only after all audio has been processed. Limited to audio data
|
193
|
-
# of 1 minute or less in duration.
|
194
|
-
#
|
195
|
-
# The Speech API will take roughly the same amount of time to process
|
196
|
-
# audio data sent synchronously as the duration of the supplied audio
|
197
|
-
# data. That is, if you send audio data of 30 seconds in length, expect
|
198
|
-
# the synchronous request to take approximately 30 seconds to return
|
199
|
-
# results.
|
200
|
-
#
|
201
|
-
# @see https://cloud.google.com/speech/docs/basics#synchronous-recognition
|
202
|
-
# Synchronous Speech API Recognition
|
203
|
-
# @see https://cloud.google.com/speech/docs/basics#phrase-hints
|
204
|
-
# Phrase Hints
|
205
|
-
#
|
206
|
-
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
207
|
-
# the path to the audio file to be recognized, or a File or other IO
|
208
|
-
# object of the audio contents, or a Cloud Storage URI of the form
|
209
|
-
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
210
|
-
# Google::Cloud::Storage::File of the text to be annotated.
|
211
|
-
# @param [String, Symbol] encoding Encoding of audio data to be
|
212
|
-
# recognized. Optional.
|
213
|
-
#
|
214
|
-
# Acceptable values are:
|
215
|
-
#
|
216
|
-
# * `linear16` - Uncompressed 16-bit signed little-endian samples.
|
217
|
-
# (LINEAR16)
|
218
|
-
# * `flac` - The [Free Lossless Audio
|
219
|
-
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
220
|
-
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
221
|
-
# are supported. (FLAC)
|
222
|
-
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
223
|
-
# G.711 PCMU/mu-law. (MULAW)
|
224
|
-
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
225
|
-
# be 8000 Hz.) (AMR)
|
226
|
-
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
227
|
-
# be 16000 Hz.) (AMR_WB)
|
228
|
-
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
229
|
-
#
|
230
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
231
|
-
# speech transcription.
|
232
|
-
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
233
|
-
#
|
234
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
235
|
-
# speech transcription. If you must use a low-bitrate encoder,
|
236
|
-
# OGG_OPUS is preferred.
|
237
|
-
#
|
238
|
-
# @param [String,Symbol] language The language of the supplied audio as
|
239
|
-
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
240
|
-
# "en-US" for English (United States), "en-GB" for English (United
|
241
|
-
# Kingdom), "fr-FR" for French (France). See [Language
|
242
|
-
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
243
|
-
# of the currently supported language codes. Optional.
|
244
|
-
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
245
|
-
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
246
|
-
# For best results, set the sampling rate of the audio source to 16000
|
247
|
-
# Hz. If that's not possible, use the native sample rate of the audio
|
248
|
-
# source (instead of re-sampling). Optional.
|
249
|
-
# @param [String] max_alternatives The Maximum number of recognition
|
250
|
-
# hypotheses to be returned. Default is 1. The service may return
|
251
|
-
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
252
|
-
# @param [Boolean] profanity_filter When `true`, the service will
|
253
|
-
# attempt to filter out profanities, replacing all but the initial
|
254
|
-
# character in each filtered word with asterisks, e.g. "f***". Default
|
255
|
-
# is `false`.
|
256
|
-
# @param [Array<String>] phrases A list of strings containing words and
|
257
|
-
# phrases "hints" so that the speech recognition is more likely to
|
258
|
-
# recognize them. See [usage
|
259
|
-
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
260
|
-
# @param [Boolean] words When `true`, return a list of words with
|
261
|
-
# additional information about each word. Currently, the only
|
262
|
-
# additional information provided is the the start and end time
|
263
|
-
# offsets. See {Result#words}. Default is `false`.
|
264
|
-
#
|
265
|
-
# @return [Array<Result>] The transcribed text of audio recognized.
|
266
|
-
#
|
267
|
-
# @example
|
268
|
-
# require "google/cloud/speech"
|
269
|
-
#
|
270
|
-
# speech = Google::Cloud::Speech.new
|
271
|
-
#
|
272
|
-
# results = speech.recognize "path/to/audio.raw",
|
273
|
-
# encoding: :linear16,
|
274
|
-
# language: "en-US",
|
275
|
-
# sample_rate: 16000
|
276
|
-
#
|
277
|
-
# @example With a Google Cloud Storage URI:
|
278
|
-
# require "google/cloud/speech"
|
279
|
-
#
|
280
|
-
# speech = Google::Cloud::Speech.new
|
281
|
-
#
|
282
|
-
# results = speech.recognize "gs://bucket-name/path/to/audio.raw",
|
283
|
-
# encoding: :linear16,
|
284
|
-
# language: "en-US",
|
285
|
-
# sample_rate: 16000
|
286
|
-
#
|
287
|
-
# @example With a Google Cloud Storage File object:
|
288
|
-
# require "google/cloud/storage"
|
289
|
-
#
|
290
|
-
# storage = Google::Cloud::Storage.new
|
291
|
-
#
|
292
|
-
# bucket = storage.bucket "bucket-name"
|
293
|
-
# file = bucket.file "path/to/audio.raw"
|
294
|
-
#
|
295
|
-
# require "google/cloud/speech"
|
296
|
-
#
|
297
|
-
# speech = Google::Cloud::Speech.new
|
298
|
-
#
|
299
|
-
# results = speech.recognize file,
|
300
|
-
# encoding: :linear16,
|
301
|
-
# language: "en-US",
|
302
|
-
# sample_rate: 16000,
|
303
|
-
# max_alternatives: 10
|
304
|
-
#
|
305
|
-
def recognize source, encoding: nil, language: nil, sample_rate: nil,
|
306
|
-
max_alternatives: nil, profanity_filter: nil,
|
307
|
-
phrases: nil, words: nil
|
308
|
-
ensure_service!
|
309
|
-
|
310
|
-
audio_obj = audio source, encoding: encoding, language: language,
|
311
|
-
sample_rate: sample_rate
|
312
|
-
|
313
|
-
config = audio_config(
|
314
|
-
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
315
|
-
language: audio_obj.language, max_alternatives: max_alternatives,
|
316
|
-
profanity_filter: profanity_filter, phrases: phrases,
|
317
|
-
words: words
|
318
|
-
)
|
319
|
-
|
320
|
-
grpc = service.recognize_sync audio_obj.to_grpc, config
|
321
|
-
grpc.results.map do |result_grpc|
|
322
|
-
Result.from_grpc result_grpc
|
323
|
-
end
|
324
|
-
end
|
325
|
-
|
326
|
-
##
|
327
|
-
# Performs asynchronous speech recognition. Requests are processed
|
328
|
-
# asynchronously, meaning a Operation is returned once the audio data
|
329
|
-
# has been sent, and can be refreshed to retrieve recognition results
|
330
|
-
# once the audio data has been processed.
|
331
|
-
#
|
332
|
-
# @see https://cloud.google.com/speech/docs/basics#async-responses
|
333
|
-
# Asynchronous Speech API Responses
|
334
|
-
#
|
335
|
-
# @param [String, IO, Google::Cloud::Storage::File] source A string of
|
336
|
-
# the path to the audio file to be recognized, or a File or other IO
|
337
|
-
# object of the audio contents, or a Cloud Storage URI of the form
|
338
|
-
# `"gs://bucketname/path/to/document.ext"`; or an instance of
|
339
|
-
# Google::Cloud::Storage::File of the text to be annotated.
|
340
|
-
# @param [String, Symbol] encoding Encoding of audio data to be
|
341
|
-
# recognized. Optional.
|
342
|
-
#
|
343
|
-
# Acceptable values are:
|
344
|
-
#
|
345
|
-
# * `linear16` - Uncompressed 16-bit signed little-endian samples.
|
346
|
-
# (LINEAR16)
|
347
|
-
# * `flac` - The [Free Lossless Audio
|
348
|
-
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
349
|
-
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
350
|
-
# are supported. (FLAC)
|
351
|
-
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
352
|
-
# G.711 PCMU/mu-law. (MULAW)
|
353
|
-
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
354
|
-
# be 8000 Hz.) (AMR)
|
355
|
-
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
356
|
-
# be 16000 Hz.) (AMR_WB)
|
357
|
-
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
358
|
-
#
|
359
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
360
|
-
# speech transcription.
|
361
|
-
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
362
|
-
#
|
363
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
364
|
-
# speech transcription. If you must use a low-bitrate encoder,
|
365
|
-
# OGG_OPUS is preferred.
|
366
|
-
#
|
367
|
-
# @param [String,Symbol] language The language of the supplied audio as
|
368
|
-
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
369
|
-
# "en-US" for English (United States), "en-GB" for English (United
|
370
|
-
# Kingdom), "fr-FR" for French (France). See [Language
|
371
|
-
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
372
|
-
# of the currently supported language codes. Optional.
|
373
|
-
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
374
|
-
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
375
|
-
# For best results, set the sampling rate of the audio source to 16000
|
376
|
-
# Hz. If that's not possible, use the native sample rate of the audio
|
377
|
-
# source (instead of re-sampling). Optional.
|
378
|
-
# @param [String] max_alternatives The Maximum number of recognition
|
379
|
-
# hypotheses to be returned. Default is 1. The service may return
|
380
|
-
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
381
|
-
# @param [Boolean] profanity_filter When `true`, the service will
|
382
|
-
# attempt to filter out profanities, replacing all but the initial
|
383
|
-
# character in each filtered word with asterisks, e.g. "f***". Default
|
384
|
-
# is `false`.
|
385
|
-
# @param [Array<String>] phrases A list of strings containing words and
|
386
|
-
# phrases "hints" so that the speech recognition is more likely to
|
387
|
-
# recognize them. See [usage
|
388
|
-
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
389
|
-
# @param [Boolean] words When `true`, return a list of words with
|
390
|
-
# additional information about each word. Currently, the only
|
391
|
-
# additional information provided is the the start and end time
|
392
|
-
# offsets. See {Result#words}. Default is `false`.
|
393
|
-
#
|
394
|
-
# @return [Operation] A resource represents the long-running,
|
395
|
-
# asynchronous processing of a speech-recognition operation.
|
396
|
-
#
|
397
|
-
# @example
|
398
|
-
# require "google/cloud/speech"
|
399
|
-
#
|
400
|
-
# speech = Google::Cloud::Speech.new
|
401
|
-
#
|
402
|
-
# op = speech.process "path/to/audio.raw",
|
403
|
-
# encoding: :linear16,
|
404
|
-
# language: "en-US",
|
405
|
-
# sample_rate: 16000
|
406
|
-
#
|
407
|
-
# op.done? #=> false
|
408
|
-
# op.reload!
|
409
|
-
#
|
410
|
-
# @example With a Google Cloud Storage URI:
|
411
|
-
# require "google/cloud/speech"
|
412
|
-
#
|
413
|
-
# speech = Google::Cloud::Speech.new
|
414
|
-
#
|
415
|
-
# op = speech.process "gs://bucket-name/path/to/audio.raw",
|
416
|
-
# encoding: :linear16,
|
417
|
-
# language: "en-US",
|
418
|
-
# sample_rate: 16000
|
419
|
-
#
|
420
|
-
# op.done? #=> false
|
421
|
-
# op.reload!
|
422
|
-
#
|
423
|
-
# @example With a Google Cloud Storage File object:
|
424
|
-
# require "google/cloud/storage"
|
425
|
-
#
|
426
|
-
# storage = Google::Cloud::Storage.new
|
427
|
-
#
|
428
|
-
# bucket = storage.bucket "bucket-name"
|
429
|
-
# file = bucket.file "path/to/audio.raw"
|
430
|
-
#
|
431
|
-
# require "google/cloud/speech"
|
432
|
-
#
|
433
|
-
# speech = Google::Cloud::Speech.new
|
434
|
-
#
|
435
|
-
# op = speech.process file,
|
436
|
-
# encoding: :linear16,
|
437
|
-
# language: "en-US",
|
438
|
-
# sample_rate: 16000,
|
439
|
-
# max_alternatives: 10
|
440
|
-
#
|
441
|
-
# op.done? #=> false
|
442
|
-
# op.reload!
|
443
|
-
#
|
444
|
-
def process source, encoding: nil, sample_rate: nil, language: nil,
|
445
|
-
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
446
|
-
words: nil
|
447
|
-
ensure_service!
|
448
|
-
|
449
|
-
audio_obj = audio source, encoding: encoding, language: language,
|
450
|
-
sample_rate: sample_rate
|
451
|
-
|
452
|
-
config = audio_config(
|
453
|
-
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
454
|
-
language: audio_obj.language, max_alternatives: max_alternatives,
|
455
|
-
profanity_filter: profanity_filter, phrases: phrases,
|
456
|
-
words: words
|
457
|
-
)
|
458
|
-
|
459
|
-
grpc = service.recognize_async audio_obj.to_grpc, config
|
460
|
-
Operation.from_grpc grpc
|
461
|
-
end
|
462
|
-
alias long_running_recognize process
|
463
|
-
alias recognize_job process
|
464
|
-
|
465
|
-
##
|
466
|
-
# Creates a Stream object to perform bidirectional streaming
|
467
|
-
# speech-recognition: receive results while sending audio.
|
468
|
-
#
|
469
|
-
# @see https://cloud.google.com/speech/docs/basics#streaming-recognition
|
470
|
-
# Streaming Speech API Recognition Requests
|
471
|
-
#
|
472
|
-
# @param [String, Symbol] encoding Encoding of audio data to be
|
473
|
-
# recognized. Optional.
|
474
|
-
#
|
475
|
-
# Acceptable values are:
|
476
|
-
#
|
477
|
-
# * `linear16` - Uncompressed 16-bit signed little-endian samples.
|
478
|
-
# (LINEAR16)
|
479
|
-
# * `flac` - The [Free Lossless Audio
|
480
|
-
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
481
|
-
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
482
|
-
# are supported. (FLAC)
|
483
|
-
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
484
|
-
# G.711 PCMU/mu-law. (MULAW)
|
485
|
-
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
486
|
-
# be 8000 Hz.) (AMR)
|
487
|
-
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
488
|
-
# be 16000 Hz.) (AMR_WB)
|
489
|
-
# * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
|
490
|
-
#
|
491
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
492
|
-
# speech transcription.
|
493
|
-
# * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
|
494
|
-
#
|
495
|
-
# Lossy codecs do not recommend, as they result in a lower-quality
|
496
|
-
# speech transcription. If you must use a low-bitrate encoder,
|
497
|
-
# OGG_OPUS is preferred.
|
498
|
-
#
|
499
|
-
# @param [String,Symbol] language The language of the supplied audio as
|
500
|
-
# a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
|
501
|
-
# "en-US" for English (United States), "en-GB" for English (United
|
502
|
-
# Kingdom), "fr-FR" for French (France). See [Language
|
503
|
-
# Support](https://cloud.google.com/speech/docs/languages) for a list
|
504
|
-
# of the currently supported language codes. Optional.
|
505
|
-
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
506
|
-
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
507
|
-
# For best results, set the sampling rate of the audio source to 16000
|
508
|
-
# Hz. If that's not possible, use the native sample rate of the audio
|
509
|
-
# source (instead of re-sampling). Optional.
|
510
|
-
# @param [String] max_alternatives The Maximum number of recognition
|
511
|
-
# hypotheses to be returned. Default is 1. The service may return
|
512
|
-
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
513
|
-
# @param [Boolean] profanity_filter When `true`, the service will
|
514
|
-
# attempt to filter out profanities, replacing all but the initial
|
515
|
-
# character in each filtered word with asterisks, e.g. "f***". Default
|
516
|
-
# is `false`.
|
517
|
-
# @param [Array<String>] phrases A list of strings containing words and
|
518
|
-
# phrases "hints" so that the speech recognition is more likely to
|
519
|
-
# recognize them. See [usage
|
520
|
-
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
521
|
-
# @param [Boolean] words When `true`, return a list of words with
|
522
|
-
# additional information about each word. Currently, the only
|
523
|
-
# additional information provided is the the start and end time
|
524
|
-
# offsets. See {Result#words}. Default is `false`.
|
525
|
-
# @param [Boolean] utterance When `true`, the service will perform
|
526
|
-
# continuous recognition (continuing to process audio even if the user
|
527
|
-
# pauses speaking) until the client closes the output stream (gRPC
|
528
|
-
# API) or when the maximum time limit has been reached. Default is
|
529
|
-
# `false`.
|
530
|
-
# @param [Boolean] interim When `true`, interim results (tentative
|
531
|
-
# hypotheses) may be returned as they become available. Default is
|
532
|
-
# `false`.
|
533
|
-
#
|
534
|
-
# @return [Stream] A resource that represents the streaming requests and
|
535
|
-
# responses.
|
536
|
-
#
|
537
|
-
# @example
|
538
|
-
# require "google/cloud/speech"
|
539
|
-
#
|
540
|
-
# speech = Google::Cloud::Speech.new
|
541
|
-
#
|
542
|
-
# stream = speech.stream encoding: :linear16,
|
543
|
-
# language: "en-US",
|
544
|
-
# sample_rate: 16000
|
545
|
-
#
|
546
|
-
# # Stream 5 seconds of audio from the microphone
|
547
|
-
# # Actual implementation of microphone input varies by platform
|
548
|
-
# 5.times do
|
549
|
-
# stream.send MicrophoneInput.read(32000)
|
550
|
-
# end
|
551
|
-
#
|
552
|
-
# stream.stop
|
553
|
-
# stream.wait_until_complete!
|
554
|
-
#
|
555
|
-
# results = stream.results
|
556
|
-
# result = results.first
|
557
|
-
# result.transcript #=> "how old is the Brooklyn Bridge"
|
558
|
-
# result.confidence #=> 0.9826789498329163
|
559
|
-
#
|
560
|
-
def stream encoding: nil, language: nil, sample_rate: nil,
|
561
|
-
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
562
|
-
words: nil, utterance: nil, interim: nil
|
563
|
-
ensure_service!
|
564
|
-
|
565
|
-
grpc_req = V1::StreamingRecognizeRequest.new(
|
566
|
-
streaming_config: V1::StreamingRecognitionConfig.new(
|
567
|
-
{
|
568
|
-
config: audio_config(encoding: convert_encoding(encoding),
|
569
|
-
language: language,
|
570
|
-
sample_rate: sample_rate,
|
571
|
-
max_alternatives: max_alternatives,
|
572
|
-
profanity_filter: profanity_filter,
|
573
|
-
phrases: phrases, words: words),
|
574
|
-
single_utterance: utterance,
|
575
|
-
interim_results: interim
|
576
|
-
}.delete_if { |_, v| v.nil? }
|
577
|
-
)
|
578
|
-
)
|
579
|
-
|
580
|
-
Stream.new service, grpc_req
|
581
|
-
end
|
582
|
-
alias stream_recognize stream
|
583
|
-
|
584
|
-
##
|
585
|
-
# Performs asynchronous speech recognition. Requests are processed
|
586
|
-
# asynchronously, meaning a Operation is returned once the audio data
|
587
|
-
# has been sent, and can be refreshed to retrieve recognition results
|
588
|
-
# once the audio data has been processed.
|
589
|
-
#
|
590
|
-
# @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
|
591
|
-
# Long-running Operation
|
592
|
-
#
|
593
|
-
# @param [String] id The unique identifier for the long running
|
594
|
-
# operation. Required.
|
595
|
-
#
|
596
|
-
# @return [Operation] A resource represents the long-running,
|
597
|
-
# asynchronous processing of a speech-recognition operation.
|
598
|
-
#
|
599
|
-
# @example
|
600
|
-
# require "google/cloud/speech"
|
601
|
-
#
|
602
|
-
# speech = Google::Cloud::Speech.new
|
603
|
-
#
|
604
|
-
# op = speech.operation "1234567890"
|
605
|
-
#
|
606
|
-
# op.done? #=> false
|
607
|
-
# op.reload!
|
608
|
-
#
|
609
|
-
def operation id
|
610
|
-
ensure_service!
|
611
|
-
|
612
|
-
grpc = service.get_op id
|
613
|
-
Operation.from_grpc grpc
|
614
|
-
end
|
615
|
-
|
616
|
-
protected
|
617
|
-
|
618
|
-
def audio_config encoding: nil, language: nil, sample_rate: nil,
|
619
|
-
max_alternatives: nil, profanity_filter: nil,
|
620
|
-
phrases: nil, words: nil
|
621
|
-
contexts = nil
|
622
|
-
contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
|
623
|
-
language = String(language) unless language.nil?
|
624
|
-
V1::RecognitionConfig.new({
|
625
|
-
encoding: convert_encoding(encoding),
|
626
|
-
language_code: language,
|
627
|
-
sample_rate_hertz: sample_rate,
|
628
|
-
max_alternatives: max_alternatives,
|
629
|
-
profanity_filter: profanity_filter,
|
630
|
-
speech_contexts: contexts,
|
631
|
-
enable_word_time_offsets: words
|
632
|
-
}.delete_if { |_, v| v.nil? })
|
633
|
-
end
|
634
|
-
|
635
|
-
def convert_encoding encoding
|
636
|
-
mapping = { linear: :LINEAR16, linear16: :LINEAR16,
|
637
|
-
flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB,
|
638
|
-
ogg_opus: :OGG_OPUS, speex: :SPEEX_WITH_HEADER_BYTE }
|
639
|
-
mapping[encoding] || encoding
|
640
|
-
end
|
641
|
-
|
642
|
-
##
|
643
|
-
# @private Raise an error unless an active connection to the service is
|
644
|
-
# available.
|
645
|
-
def ensure_service!
|
646
|
-
raise "Must have active connection to service" unless service
|
647
|
-
end
|
648
|
-
end
|
649
|
-
end
|
650
|
-
end
|
651
|
-
end
|