google-cloud-speech 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -1
  3. data/LICENSE +1 -1
  4. data/README.md +69 -43
  5. data/lib/google/cloud/speech.rb +94 -252
  6. data/lib/google/cloud/speech/v1.rb +11 -1
  7. data/lib/google/cloud/speech/v1/cloud_speech_services_pb.rb +1 -1
  8. data/lib/google/cloud/speech/{version.rb → v1/credentials.rb} +12 -2
  9. data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +2 -2
  10. data/lib/google/cloud/speech/v1/doc/google/longrunning/operations.rb +92 -0
  11. data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +1 -1
  12. data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +1 -1
  13. data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +1 -1
  14. data/lib/google/cloud/speech/v1/doc/overview.rb +1 -1
  15. data/lib/google/cloud/speech/v1/helpers.rb +93 -0
  16. data/lib/google/cloud/speech/v1/speech_client.rb +26 -49
  17. data/lib/google/cloud/speech/v1/speech_client_config.json +5 -5
  18. data/lib/google/cloud/speech/v1/stream.rb +614 -0
  19. data/lib/google/cloud/speech/v1p1beta1.rb +126 -0
  20. data/lib/google/cloud/speech/v1p1beta1/cloud_speech_pb.rb +175 -0
  21. data/lib/google/cloud/speech/v1p1beta1/cloud_speech_services_pb.rb +54 -0
  22. data/lib/google/cloud/speech/v1p1beta1/credentials.rb +32 -0
  23. data/lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb +625 -0
  24. data/lib/google/cloud/speech/v1p1beta1/doc/google/longrunning/operations.rb +92 -0
  25. data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/any.rb +124 -0
  26. data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/duration.rb +90 -0
  27. data/lib/google/cloud/speech/v1p1beta1/doc/google/rpc/status.rb +83 -0
  28. data/lib/google/cloud/speech/v1p1beta1/doc/overview.rb +73 -0
  29. data/lib/google/cloud/speech/v1p1beta1/helpers.rb +93 -0
  30. data/lib/google/cloud/speech/v1p1beta1/speech_client.rb +322 -0
  31. data/lib/google/cloud/speech/v1p1beta1/speech_client_config.json +41 -0
  32. data/lib/google/cloud/speech/v1p1beta1/stream.rb +614 -0
  33. metadata +29 -120
  34. data/lib/google-cloud-speech.rb +0 -142
  35. data/lib/google/cloud/speech/audio.rb +0 -330
  36. data/lib/google/cloud/speech/convert.rb +0 -46
  37. data/lib/google/cloud/speech/credentials.rb +0 -57
  38. data/lib/google/cloud/speech/operation.rb +0 -262
  39. data/lib/google/cloud/speech/project.rb +0 -651
  40. data/lib/google/cloud/speech/result.rb +0 -240
  41. data/lib/google/cloud/speech/service.rb +0 -121
  42. data/lib/google/cloud/speech/stream.rb +0 -564
@@ -1,46 +0,0 @@
1
- # Copyright 2017 Google LLC
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- require "google/protobuf/duration_pb"
17
-
18
- module Google
19
- module Cloud
20
- module Speech
21
- ##
22
- # @private Helper module for converting Speech values.
23
- module Convert
24
- module ClassMethods
25
- def number_to_duration number
26
- return nil if number.nil?
27
-
28
- Google::Protobuf::Duration.new \
29
- seconds: number.to_i,
30
- nanos: (number.remainder(1) * 1000000000).round
31
- end
32
-
33
- def duration_to_number duration
34
- return nil if duration.nil?
35
-
36
- return duration.seconds if duration.nanos.zero?
37
-
38
- duration.seconds + (duration.nanos / 1000000000.0)
39
- end
40
- end
41
-
42
- extend ClassMethods
43
- end
44
- end
45
- end
46
- end
@@ -1,57 +0,0 @@
1
- # Copyright 2016 Google LLC
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- require "googleauth"
17
-
18
- module Google
19
- module Cloud
20
- module Speech
21
- ##
22
- # # Credentials
23
- #
24
- # Represents the authentication and authorization used to connect to the
25
- # Speech API.
26
- #
27
- # @example
28
- # require "google/cloud/speech"
29
- #
30
- # keyfile = "/path/to/keyfile.json"
31
- # creds = Google::Cloud::Speech::Credentials.new keyfile
32
- #
33
- # speech = Google::Cloud::Speech.new(
34
- # project_id: "my-project",
35
- # credentials: creds
36
- # )
37
- #
38
- # speech.project_id #=> "my-project"
39
- #
40
- class Credentials < Google::Auth::Credentials
41
- SCOPE = ["https://www.googleapis.com/auth/cloud-platform"].freeze
42
- PATH_ENV_VARS = %w[SPEECH_CREDENTIALS
43
- SPEECH_KEYFILE
44
- GOOGLE_CLOUD_CREDENTIALS
45
- GOOGLE_CLOUD_KEYFILE
46
- GCLOUD_KEYFILE].freeze
47
- JSON_ENV_VARS = %w[SPEECH_CREDENTIALS_JSON
48
- SPEECH_KEYFILE_JSON
49
- GOOGLE_CLOUD_CREDENTIALS_JSON
50
- GOOGLE_CLOUD_KEYFILE_JSON
51
- GCLOUD_KEYFILE_JSON].freeze
52
- DEFAULT_PATHS = \
53
- ["~/.config/gcloud/application_default_credentials.json"].freeze
54
- end
55
- end
56
- end
57
- end
@@ -1,262 +0,0 @@
1
- # Copyright 2016 Google LLC
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- require "google/cloud/speech/v1"
17
- require "google/cloud/errors"
18
-
19
- module Google
20
- module Cloud
21
- module Speech
22
- ##
23
- # # Operation
24
- #
25
- # A resource represents the long-running, asynchronous processing of a
26
- # speech-recognition operation. The op can be refreshed to retrieve
27
- # recognition results once the audio data has been processed.
28
- #
29
- # See {Project#process} and {Audio#process}.
30
- #
31
- # @see https://cloud.google.com/speech/docs/basics#async-responses
32
- # Asynchronous Speech API Responses
33
- # @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
34
- # Long-running Operation
35
- #
36
- # @example
37
- # require "google/cloud/speech"
38
- #
39
- # speech = Google::Cloud::Speech.new
40
- #
41
- # op = speech.process "path/to/audio.raw",
42
- # encoding: :linear16,
43
- # language: "en-US",
44
- # sample_rate: 16000
45
- #
46
- # op.done? #=> false
47
- # op.reload! # API call
48
- # op.done? #=> true
49
- # results = op.results
50
- #
51
- class Operation
52
- ##
53
- # @private The Google::Gax::Operation gRPC object.
54
- attr_accessor :grpc
55
-
56
- ##
57
- # @private Creates a new Job instance.
58
- def initialize
59
- @grpc = nil
60
- end
61
-
62
- ##
63
- # The unique identifier for the long running operation.
64
- #
65
- # @return [String] The unique identifier for the long running operation.
66
- #
67
- # @example
68
- # require "google/cloud/speech"
69
- #
70
- # speech = Google::Cloud::Speech.new
71
- #
72
- # op = speech.process "path/to/audio.raw",
73
- # encoding: :linear16,
74
- # language: "en-US",
75
- # sample_rate: 16000
76
- #
77
- # op.id #=> "1234567890"
78
- #
79
- def id
80
- @grpc.name
81
- end
82
-
83
- ##
84
- # Checks if the speech-recognition processing of the audio data is
85
- # complete.
86
- #
87
- # @return [boolean] `true` when complete, `false` otherwise.
88
- #
89
- # @example
90
- # require "google/cloud/speech"
91
- #
92
- # speech = Google::Cloud::Speech.new
93
- #
94
- # op = speech.process "path/to/audio.raw",
95
- # encoding: :linear16,
96
- # language: "en-US",
97
- # sample_rate: 16000
98
- #
99
- # op.done? #=> false
100
- #
101
- def done?
102
- @grpc.done?
103
- end
104
-
105
- ##
106
- # A speech recognition result corresponding to a portion of the audio.
107
- #
108
- # @return [Array<Result>] The transcribed text of audio recognized. If
109
- # the op is not done this will return `nil`.
110
- #
111
- # @example
112
- # require "google/cloud/speech"
113
- #
114
- # speech = Google::Cloud::Speech.new
115
- #
116
- # op = speech.process "path/to/audio.raw",
117
- # encoding: :linear16,
118
- # language: "en-US",
119
- # sample_rate: 16000
120
- #
121
- # op.done? #=> true
122
- # op.results? #=> true
123
- # results = op.results
124
- #
125
- def results
126
- return nil unless results?
127
- @grpc.response.results.map do |result_grpc|
128
- Result.from_grpc result_grpc
129
- end
130
- end
131
-
132
- ##
133
- # Checks if the speech-recognition processing of the audio data is
134
- # complete.
135
- #
136
- # @return [boolean] `true` when complete, `false` otherwise.
137
- #
138
- # @example
139
- # require "google/cloud/speech"
140
- #
141
- # speech = Google::Cloud::Speech.new
142
- #
143
- # op = speech.process "path/to/audio.raw",
144
- # encoding: :linear16,
145
- # language: "en-US",
146
- # sample_rate: 16000
147
- #
148
- # op.done? #=> true
149
- # op.results? #=> true
150
- # results = op.results
151
- #
152
- def results?
153
- @grpc.response?
154
- end
155
-
156
- ##
157
- # The error information if the speech-recognition processing of the
158
- # audio data has returned an error.
159
- #
160
- # @return [Google::Cloud::Error] The error.
161
- #
162
- # @example
163
- # require "google/cloud/speech"
164
- #
165
- # speech = Google::Cloud::Speech.new
166
- #
167
- # op = speech.process "path/to/audio.raw",
168
- # encoding: :linear16,
169
- # language: "en-US",
170
- # sample_rate: 16000
171
- #
172
- # op.done? #=> true
173
- # op.error? #=> true
174
- # error = op.error
175
- #
176
- def error
177
- return nil unless error?
178
- Google::Cloud::Error.from_error @grpc.error
179
- end
180
-
181
- ##
182
- # Checks if the speech-recognition processing of the audio data has
183
- # returned an error.
184
- #
185
- # @return [boolean] `true` when errored, `false` otherwise.
186
- #
187
- # @example
188
- # require "google/cloud/speech"
189
- #
190
- # speech = Google::Cloud::Speech.new
191
- #
192
- # op = speech.process "path/to/audio.raw",
193
- # encoding: :linear16,
194
- # language: "en-US",
195
- # sample_rate: 16000
196
- #
197
- # op.done? #=> true
198
- # op.error? #=> true
199
- # error = op.error
200
- #
201
- def error?
202
- @grpc.error?
203
- end
204
-
205
- ##
206
- # Reloads the op with current data from the long-running, asynchronous
207
- # processing of a speech-recognition operation.
208
- #
209
- # @example
210
- # require "google/cloud/speech"
211
- #
212
- # speech = Google::Cloud::Speech.new
213
- #
214
- # op = speech.process "path/to/audio.raw",
215
- # encoding: :linear16,
216
- # language: "en-US",
217
- # sample_rate: 16000
218
- #
219
- # op.done? #=> false
220
- # op.reload! # API call
221
- # op.done? #=> true
222
- #
223
- def reload!
224
- @grpc.reload!
225
- self
226
- end
227
- alias refresh! reload!
228
-
229
- ##
230
- # Reloads the op until the operation is complete. The delay between
231
- # reloads will incrementally increase.
232
- #
233
- # @example
234
- # require "google/cloud/speech"
235
- #
236
- # speech = Google::Cloud::Speech.new
237
- #
238
- # op = speech.process "path/to/audio.raw",
239
- # encoding: :linear16,
240
- # language: "en-US",
241
- # sample_rate: 16000
242
- #
243
- # op.done? #=> false
244
- # op.wait_until_done!
245
- # op.done? #=> true
246
- #
247
- def wait_until_done!
248
- @grpc.wait_until_done!
249
- end
250
-
251
- ##
252
- # @private New Result::Job from a Google::Gax::Operation
253
- # object.
254
- def self.from_grpc grpc
255
- new.tap do |job|
256
- job.instance_variable_set :@grpc, grpc
257
- end
258
- end
259
- end
260
- end
261
- end
262
- end
@@ -1,651 +0,0 @@
1
- # Copyright 2016 Google LLC
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- require "google/cloud/errors"
17
- require "google/cloud/speech/service"
18
- require "google/cloud/speech/audio"
19
- require "google/cloud/speech/result"
20
- require "google/cloud/speech/operation"
21
- require "google/cloud/speech/stream"
22
-
23
- module Google
24
- module Cloud
25
- module Speech
26
- ##
27
- # # Project
28
- #
29
- # The Google Cloud Speech API enables developers to convert audio to text
30
- # by applying powerful neural network models. The API recognizes over 80
31
- # languages and variants, to support your global user base. You can
32
- # transcribe the text of users dictating to an application's microphone,
33
- # enable command-and-control through voice, or transcribe audio files,
34
- # among many other use cases. Recognize audio uploaded in the request, and
35
- # integrate with your audio storage on Google Cloud Storage, by using the
36
- # same technology Google uses to power its own products.
37
- #
38
- # See {Google::Cloud#speech}
39
- #
40
- # @example
41
- # require "google/cloud/speech"
42
- #
43
- # speech = Google::Cloud::Speech.new
44
- #
45
- # audio = speech.audio "path/to/audio.raw",
46
- # encoding: :linear16,
47
- # language: "en-US",
48
- # sample_rate: 16000
49
- # results = audio.recognize
50
- #
51
- # result = results.first
52
- # result.transcript #=> "how old is the Brooklyn Bridge"
53
- # result.confidence #=> 0.9826789498329163
54
- #
55
- class Project
56
- ##
57
- # @private The gRPC Service object.
58
- attr_accessor :service
59
-
60
- ##
61
- # @private Creates a new Speech Project instance.
62
- def initialize service
63
- @service = service
64
- end
65
-
66
- # The Speech project connected to.
67
- #
68
- # @example
69
- # require "google/cloud/speech"
70
- #
71
- # speech = Google::Cloud::Speech.new(
72
- # project_id: "my-project",
73
- # credentials: "/path/to/keyfile.json"
74
- # )
75
- #
76
- # speech.project_id #=> "my-project"
77
- #
78
- def project_id
79
- service.project
80
- end
81
- alias project project_id
82
-
83
- ##
84
- # Returns a new Audio instance from the given source. No API call is
85
- # made.
86
- #
87
- # @see https://cloud.google.com/speech/docs/basics#audio-encodings
88
- # Audio Encodings
89
- # @see https://cloud.google.com/speech/docs/basics#sample-rates
90
- # Sample Rates
91
- # @see https://cloud.google.com/speech/docs/basics#languages
92
- # Languages
93
- #
94
- # @param [String, IO, Google::Cloud::Storage::File] source A string of
95
- # the path to the audio file to be recognized, or a File or other IO
96
- # object of the audio contents, or a Cloud Storage URI of the form
97
- # `"gs://bucketname/path/to/document.ext"`; or an instance of
98
- # Google::Cloud::Storage::File of the text to be annotated.
99
- # @param [String, Symbol] encoding Encoding of audio data to be
100
- # recognized. Optional.
101
- #
102
- # Acceptable values are:
103
- #
104
- # * `linear16` - Uncompressed 16-bit signed little-endian samples.
105
- # (LINEAR16)
106
- # * `flac` - The [Free Lossless Audio
107
- # Codec](http://flac.sourceforge.net/documentation.html) encoding.
108
- # Only 16-bit samples are supported. Not all fields in STREAMINFO
109
- # are supported. (FLAC)
110
- # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
111
- # G.711 PCMU/mu-law. (MULAW)
112
- # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
113
- # be 8000 Hz.) (AMR)
114
- # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
115
- # be 16000 Hz.) (AMR_WB)
116
- # * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
117
- #
118
- # Lossy codecs do not recommend, as they result in a lower-quality
119
- # speech transcription.
120
- # * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
121
- #
122
- # Lossy codecs do not recommend, as they result in a lower-quality
123
- # speech transcription. If you must use a low-bitrate encoder,
124
- # OGG_OPUS is preferred.
125
- #
126
- # @param [String,Symbol] language The language of the supplied audio as
127
- # a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
128
- # "en-US" for English (United States), "en-GB" for English (United
129
- # Kingdom), "fr-FR" for French (France). See [Language
130
- # Support](https://cloud.google.com/speech/docs/languages) for a list
131
- # of the currently supported language codes. Optional.
132
- # @param [Integer] sample_rate Sample rate in Hertz of the audio data
133
- # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
134
- # For best results, set the sampling rate of the audio source to 16000
135
- # Hz. If that's not possible, use the native sample rate of the audio
136
- # source (instead of re-sampling). Optional.
137
- #
138
- # @return [Audio] The audio file to be recognized.
139
- #
140
- # @example
141
- # require "google/cloud/speech"
142
- #
143
- # speech = Google::Cloud::Speech.new
144
- #
145
- # audio = speech.audio "path/to/audio.raw",
146
- # encoding: :linear16,
147
- # language: "en-US",
148
- # sample_rate: 16000
149
- #
150
- # @example With a Google Cloud Storage URI:
151
- # require "google/cloud/speech"
152
- #
153
- # speech = Google::Cloud::Speech.new
154
- #
155
- # audio = speech.audio "gs://bucket-name/path/to/audio.raw",
156
- # encoding: :linear16,
157
- # language: "en-US",
158
- # sample_rate: 16000
159
- #
160
- # @example With a Google Cloud Storage File object:
161
- # require "google/cloud/storage"
162
- #
163
- # storage = Google::Cloud::Storage.new
164
- #
165
- # bucket = storage.bucket "bucket-name"
166
- # file = bucket.file "path/to/audio.raw"
167
- #
168
- # require "google/cloud/speech"
169
- #
170
- # speech = Google::Cloud::Speech.new
171
- #
172
- # audio = speech.audio file,
173
- # encoding: :linear16,
174
- # language: "en-US",
175
- # sample_rate: 16000
176
- #
177
- def audio source, encoding: nil, language: nil, sample_rate: nil
178
- audio = if source.is_a? Audio
179
- source.dup
180
- else
181
- Audio.from_source source, self
182
- end
183
- audio.encoding = encoding unless encoding.nil?
184
- audio.language = language unless language.nil?
185
- audio.sample_rate = sample_rate unless sample_rate.nil?
186
- audio
187
- end
188
-
189
- ##
190
- # Performs synchronous speech recognition. Sends audio data to the
191
- # Speech API, which performs recognition on that data, and returns
192
- # results only after all audio has been processed. Limited to audio data
193
- # of 1 minute or less in duration.
194
- #
195
- # The Speech API will take roughly the same amount of time to process
196
- # audio data sent synchronously as the duration of the supplied audio
197
- # data. That is, if you send audio data of 30 seconds in length, expect
198
- # the synchronous request to take approximately 30 seconds to return
199
- # results.
200
- #
201
- # @see https://cloud.google.com/speech/docs/basics#synchronous-recognition
202
- # Synchronous Speech API Recognition
203
- # @see https://cloud.google.com/speech/docs/basics#phrase-hints
204
- # Phrase Hints
205
- #
206
- # @param [String, IO, Google::Cloud::Storage::File] source A string of
207
- # the path to the audio file to be recognized, or a File or other IO
208
- # object of the audio contents, or a Cloud Storage URI of the form
209
- # `"gs://bucketname/path/to/document.ext"`; or an instance of
210
- # Google::Cloud::Storage::File of the text to be annotated.
211
- # @param [String, Symbol] encoding Encoding of audio data to be
212
- # recognized. Optional.
213
- #
214
- # Acceptable values are:
215
- #
216
- # * `linear16` - Uncompressed 16-bit signed little-endian samples.
217
- # (LINEAR16)
218
- # * `flac` - The [Free Lossless Audio
219
- # Codec](http://flac.sourceforge.net/documentation.html) encoding.
220
- # Only 16-bit samples are supported. Not all fields in STREAMINFO
221
- # are supported. (FLAC)
222
- # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
223
- # G.711 PCMU/mu-law. (MULAW)
224
- # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
225
- # be 8000 Hz.) (AMR)
226
- # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
227
- # be 16000 Hz.) (AMR_WB)
228
- # * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
229
- #
230
- # Lossy codecs do not recommend, as they result in a lower-quality
231
- # speech transcription.
232
- # * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
233
- #
234
- # Lossy codecs do not recommend, as they result in a lower-quality
235
- # speech transcription. If you must use a low-bitrate encoder,
236
- # OGG_OPUS is preferred.
237
- #
238
- # @param [String,Symbol] language The language of the supplied audio as
239
- # a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
240
- # "en-US" for English (United States), "en-GB" for English (United
241
- # Kingdom), "fr-FR" for French (France). See [Language
242
- # Support](https://cloud.google.com/speech/docs/languages) for a list
243
- # of the currently supported language codes. Optional.
244
- # @param [Integer] sample_rate Sample rate in Hertz of the audio data
245
- # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
246
- # For best results, set the sampling rate of the audio source to 16000
247
- # Hz. If that's not possible, use the native sample rate of the audio
248
- # source (instead of re-sampling). Optional.
249
- # @param [String] max_alternatives The Maximum number of recognition
250
- # hypotheses to be returned. Default is 1. The service may return
251
- # fewer. Valid values are 0-30. Defaults to 1. Optional.
252
- # @param [Boolean] profanity_filter When `true`, the service will
253
- # attempt to filter out profanities, replacing all but the initial
254
- # character in each filtered word with asterisks, e.g. "f***". Default
255
- # is `false`.
256
- # @param [Array<String>] phrases A list of strings containing words and
257
- # phrases "hints" so that the speech recognition is more likely to
258
- # recognize them. See [usage
259
- # limits](https://cloud.google.com/speech/limits#content). Optional.
260
- # @param [Boolean] words When `true`, return a list of words with
261
- # additional information about each word. Currently, the only
262
- # additional information provided is the the start and end time
263
- # offsets. See {Result#words}. Default is `false`.
264
- #
265
- # @return [Array<Result>] The transcribed text of audio recognized.
266
- #
267
- # @example
268
- # require "google/cloud/speech"
269
- #
270
- # speech = Google::Cloud::Speech.new
271
- #
272
- # results = speech.recognize "path/to/audio.raw",
273
- # encoding: :linear16,
274
- # language: "en-US",
275
- # sample_rate: 16000
276
- #
277
- # @example With a Google Cloud Storage URI:
278
- # require "google/cloud/speech"
279
- #
280
- # speech = Google::Cloud::Speech.new
281
- #
282
- # results = speech.recognize "gs://bucket-name/path/to/audio.raw",
283
- # encoding: :linear16,
284
- # language: "en-US",
285
- # sample_rate: 16000
286
- #
287
- # @example With a Google Cloud Storage File object:
288
- # require "google/cloud/storage"
289
- #
290
- # storage = Google::Cloud::Storage.new
291
- #
292
- # bucket = storage.bucket "bucket-name"
293
- # file = bucket.file "path/to/audio.raw"
294
- #
295
- # require "google/cloud/speech"
296
- #
297
- # speech = Google::Cloud::Speech.new
298
- #
299
- # results = speech.recognize file,
300
- # encoding: :linear16,
301
- # language: "en-US",
302
- # sample_rate: 16000,
303
- # max_alternatives: 10
304
- #
305
- def recognize source, encoding: nil, language: nil, sample_rate: nil,
306
- max_alternatives: nil, profanity_filter: nil,
307
- phrases: nil, words: nil
308
- ensure_service!
309
-
310
- audio_obj = audio source, encoding: encoding, language: language,
311
- sample_rate: sample_rate
312
-
313
- config = audio_config(
314
- encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
315
- language: audio_obj.language, max_alternatives: max_alternatives,
316
- profanity_filter: profanity_filter, phrases: phrases,
317
- words: words
318
- )
319
-
320
- grpc = service.recognize_sync audio_obj.to_grpc, config
321
- grpc.results.map do |result_grpc|
322
- Result.from_grpc result_grpc
323
- end
324
- end
325
-
326
- ##
327
- # Performs asynchronous speech recognition. Requests are processed
328
- # asynchronously, meaning a Operation is returned once the audio data
329
- # has been sent, and can be refreshed to retrieve recognition results
330
- # once the audio data has been processed.
331
- #
332
- # @see https://cloud.google.com/speech/docs/basics#async-responses
333
- # Asynchronous Speech API Responses
334
- #
335
- # @param [String, IO, Google::Cloud::Storage::File] source A string of
336
- # the path to the audio file to be recognized, or a File or other IO
337
- # object of the audio contents, or a Cloud Storage URI of the form
338
- # `"gs://bucketname/path/to/document.ext"`; or an instance of
339
- # Google::Cloud::Storage::File of the text to be annotated.
340
- # @param [String, Symbol] encoding Encoding of audio data to be
341
- # recognized. Optional.
342
- #
343
- # Acceptable values are:
344
- #
345
- # * `linear16` - Uncompressed 16-bit signed little-endian samples.
346
- # (LINEAR16)
347
- # * `flac` - The [Free Lossless Audio
348
- # Codec](http://flac.sourceforge.net/documentation.html) encoding.
349
- # Only 16-bit samples are supported. Not all fields in STREAMINFO
350
- # are supported. (FLAC)
351
- # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
352
- # G.711 PCMU/mu-law. (MULAW)
353
- # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
354
- # be 8000 Hz.) (AMR)
355
- # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
356
- # be 16000 Hz.) (AMR_WB)
357
- # * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
358
- #
359
- # Lossy codecs do not recommend, as they result in a lower-quality
360
- # speech transcription.
361
- # * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
362
- #
363
- # Lossy codecs do not recommend, as they result in a lower-quality
364
- # speech transcription. If you must use a low-bitrate encoder,
365
- # OGG_OPUS is preferred.
366
- #
367
- # @param [String,Symbol] language The language of the supplied audio as
368
- # a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
369
- # "en-US" for English (United States), "en-GB" for English (United
370
- # Kingdom), "fr-FR" for French (France). See [Language
371
- # Support](https://cloud.google.com/speech/docs/languages) for a list
372
- # of the currently supported language codes. Optional.
373
- # @param [Integer] sample_rate Sample rate in Hertz of the audio data
374
- # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
375
- # For best results, set the sampling rate of the audio source to 16000
376
- # Hz. If that's not possible, use the native sample rate of the audio
377
- # source (instead of re-sampling). Optional.
378
- # @param [String] max_alternatives The Maximum number of recognition
379
- # hypotheses to be returned. Default is 1. The service may return
380
- # fewer. Valid values are 0-30. Defaults to 1. Optional.
381
- # @param [Boolean] profanity_filter When `true`, the service will
382
- # attempt to filter out profanities, replacing all but the initial
383
- # character in each filtered word with asterisks, e.g. "f***". Default
384
- # is `false`.
385
- # @param [Array<String>] phrases A list of strings containing words and
386
- # phrases "hints" so that the speech recognition is more likely to
387
- # recognize them. See [usage
388
- # limits](https://cloud.google.com/speech/limits#content). Optional.
389
- # @param [Boolean] words When `true`, return a list of words with
390
- # additional information about each word. Currently, the only
391
- # additional information provided is the the start and end time
392
- # offsets. See {Result#words}. Default is `false`.
393
- #
394
- # @return [Operation] A resource represents the long-running,
395
- # asynchronous processing of a speech-recognition operation.
396
- #
397
- # @example
398
- # require "google/cloud/speech"
399
- #
400
- # speech = Google::Cloud::Speech.new
401
- #
402
- # op = speech.process "path/to/audio.raw",
403
- # encoding: :linear16,
404
- # language: "en-US",
405
- # sample_rate: 16000
406
- #
407
- # op.done? #=> false
408
- # op.reload!
409
- #
410
- # @example With a Google Cloud Storage URI:
411
- # require "google/cloud/speech"
412
- #
413
- # speech = Google::Cloud::Speech.new
414
- #
415
- # op = speech.process "gs://bucket-name/path/to/audio.raw",
416
- # encoding: :linear16,
417
- # language: "en-US",
418
- # sample_rate: 16000
419
- #
420
- # op.done? #=> false
421
- # op.reload!
422
- #
423
- # @example With a Google Cloud Storage File object:
424
- # require "google/cloud/storage"
425
- #
426
- # storage = Google::Cloud::Storage.new
427
- #
428
- # bucket = storage.bucket "bucket-name"
429
- # file = bucket.file "path/to/audio.raw"
430
- #
431
- # require "google/cloud/speech"
432
- #
433
- # speech = Google::Cloud::Speech.new
434
- #
435
- # op = speech.process file,
436
- # encoding: :linear16,
437
- # language: "en-US",
438
- # sample_rate: 16000,
439
- # max_alternatives: 10
440
- #
441
- # op.done? #=> false
442
- # op.reload!
443
- #
444
- def process source, encoding: nil, sample_rate: nil, language: nil,
445
- max_alternatives: nil, profanity_filter: nil, phrases: nil,
446
- words: nil
447
- ensure_service!
448
-
449
- audio_obj = audio source, encoding: encoding, language: language,
450
- sample_rate: sample_rate
451
-
452
- config = audio_config(
453
- encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
454
- language: audio_obj.language, max_alternatives: max_alternatives,
455
- profanity_filter: profanity_filter, phrases: phrases,
456
- words: words
457
- )
458
-
459
- grpc = service.recognize_async audio_obj.to_grpc, config
460
- Operation.from_grpc grpc
461
- end
462
- alias long_running_recognize process
463
- alias recognize_job process
464
-
465
- ##
466
- # Creates a Stream object to perform bidirectional streaming
467
- # speech-recognition: receive results while sending audio.
468
- #
469
- # @see https://cloud.google.com/speech/docs/basics#streaming-recognition
470
- # Streaming Speech API Recognition Requests
471
- #
472
- # @param [String, Symbol] encoding Encoding of audio data to be
473
- # recognized. Optional.
474
- #
475
- # Acceptable values are:
476
- #
477
- # * `linear16` - Uncompressed 16-bit signed little-endian samples.
478
- # (LINEAR16)
479
- # * `flac` - The [Free Lossless Audio
480
- # Codec](http://flac.sourceforge.net/documentation.html) encoding.
481
- # Only 16-bit samples are supported. Not all fields in STREAMINFO
482
- # are supported. (FLAC)
483
- # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
484
- # G.711 PCMU/mu-law. (MULAW)
485
- # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
486
- # be 8000 Hz.) (AMR)
487
- # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
488
- # be 16000 Hz.) (AMR_WB)
489
- # * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
490
- #
491
- # Lossy codecs do not recommend, as they result in a lower-quality
492
- # speech transcription.
493
- # * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
494
- #
495
- # Lossy codecs do not recommend, as they result in a lower-quality
496
- # speech transcription. If you must use a low-bitrate encoder,
497
- # OGG_OPUS is preferred.
498
- #
499
- # @param [String,Symbol] language The language of the supplied audio as
500
- # a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
501
- # "en-US" for English (United States), "en-GB" for English (United
502
- # Kingdom), "fr-FR" for French (France). See [Language
503
- # Support](https://cloud.google.com/speech/docs/languages) for a list
504
- # of the currently supported language codes. Optional.
505
- # @param [Integer] sample_rate Sample rate in Hertz of the audio data
506
- # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
507
- # For best results, set the sampling rate of the audio source to 16000
508
- # Hz. If that's not possible, use the native sample rate of the audio
509
- # source (instead of re-sampling). Optional.
510
- # @param [String] max_alternatives The Maximum number of recognition
511
- # hypotheses to be returned. Default is 1. The service may return
512
- # fewer. Valid values are 0-30. Defaults to 1. Optional.
513
- # @param [Boolean] profanity_filter When `true`, the service will
514
- # attempt to filter out profanities, replacing all but the initial
515
- # character in each filtered word with asterisks, e.g. "f***". Default
516
- # is `false`.
517
- # @param [Array<String>] phrases A list of strings containing words and
518
- # phrases "hints" so that the speech recognition is more likely to
519
- # recognize them. See [usage
520
- # limits](https://cloud.google.com/speech/limits#content). Optional.
521
- # @param [Boolean] words When `true`, return a list of words with
522
- # additional information about each word. Currently, the only
523
- # additional information provided is the the start and end time
524
- # offsets. See {Result#words}. Default is `false`.
525
- # @param [Boolean] utterance When `true`, the service will perform
526
- # continuous recognition (continuing to process audio even if the user
527
- # pauses speaking) until the client closes the output stream (gRPC
528
- # API) or when the maximum time limit has been reached. Default is
529
- # `false`.
530
- # @param [Boolean] interim When `true`, interim results (tentative
531
- # hypotheses) may be returned as they become available. Default is
532
- # `false`.
533
- #
534
- # @return [Stream] A resource that represents the streaming requests and
535
- # responses.
536
- #
537
- # @example
538
- # require "google/cloud/speech"
539
- #
540
- # speech = Google::Cloud::Speech.new
541
- #
542
- # stream = speech.stream encoding: :linear16,
543
- # language: "en-US",
544
- # sample_rate: 16000
545
- #
546
- # # Stream 5 seconds of audio from the microphone
547
- # # Actual implementation of microphone input varies by platform
548
- # 5.times do
549
- # stream.send MicrophoneInput.read(32000)
550
- # end
551
- #
552
- # stream.stop
553
- # stream.wait_until_complete!
554
- #
555
- # results = stream.results
556
- # result = results.first
557
- # result.transcript #=> "how old is the Brooklyn Bridge"
558
- # result.confidence #=> 0.9826789498329163
559
- #
560
- def stream encoding: nil, language: nil, sample_rate: nil,
561
- max_alternatives: nil, profanity_filter: nil, phrases: nil,
562
- words: nil, utterance: nil, interim: nil
563
- ensure_service!
564
-
565
- grpc_req = V1::StreamingRecognizeRequest.new(
566
- streaming_config: V1::StreamingRecognitionConfig.new(
567
- {
568
- config: audio_config(encoding: convert_encoding(encoding),
569
- language: language,
570
- sample_rate: sample_rate,
571
- max_alternatives: max_alternatives,
572
- profanity_filter: profanity_filter,
573
- phrases: phrases, words: words),
574
- single_utterance: utterance,
575
- interim_results: interim
576
- }.delete_if { |_, v| v.nil? }
577
- )
578
- )
579
-
580
- Stream.new service, grpc_req
581
- end
582
- alias stream_recognize stream
583
-
584
- ##
585
- # Performs asynchronous speech recognition. Requests are processed
586
- # asynchronously, meaning a Operation is returned once the audio data
587
- # has been sent, and can be refreshed to retrieve recognition results
588
- # once the audio data has been processed.
589
- #
590
- # @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
591
- # Long-running Operation
592
- #
593
- # @param [String] id The unique identifier for the long running
594
- # operation. Required.
595
- #
596
- # @return [Operation] A resource represents the long-running,
597
- # asynchronous processing of a speech-recognition operation.
598
- #
599
- # @example
600
- # require "google/cloud/speech"
601
- #
602
- # speech = Google::Cloud::Speech.new
603
- #
604
- # op = speech.operation "1234567890"
605
- #
606
- # op.done? #=> false
607
- # op.reload!
608
- #
609
- def operation id
610
- ensure_service!
611
-
612
- grpc = service.get_op id
613
- Operation.from_grpc grpc
614
- end
615
-
616
- protected
617
-
618
- def audio_config encoding: nil, language: nil, sample_rate: nil,
619
- max_alternatives: nil, profanity_filter: nil,
620
- phrases: nil, words: nil
621
- contexts = nil
622
- contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
623
- language = String(language) unless language.nil?
624
- V1::RecognitionConfig.new({
625
- encoding: convert_encoding(encoding),
626
- language_code: language,
627
- sample_rate_hertz: sample_rate,
628
- max_alternatives: max_alternatives,
629
- profanity_filter: profanity_filter,
630
- speech_contexts: contexts,
631
- enable_word_time_offsets: words
632
- }.delete_if { |_, v| v.nil? })
633
- end
634
-
635
- def convert_encoding encoding
636
- mapping = { linear: :LINEAR16, linear16: :LINEAR16,
637
- flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB,
638
- ogg_opus: :OGG_OPUS, speex: :SPEEX_WITH_HEADER_BYTE }
639
- mapping[encoding] || encoding
640
- end
641
-
642
- ##
643
- # @private Raise an error unless an active connection to the service is
644
- # available.
645
- def ensure_service!
646
- raise "Must have active connection to service" unless service
647
- end
648
- end
649
- end
650
- end
651
- end