google-cloud-speech 0.29.0 → 0.30.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -1
  3. data/LICENSE +1 -1
  4. data/README.md +69 -43
  5. data/lib/google/cloud/speech.rb +94 -252
  6. data/lib/google/cloud/speech/v1.rb +11 -1
  7. data/lib/google/cloud/speech/v1/cloud_speech_services_pb.rb +1 -1
  8. data/lib/google/cloud/speech/{version.rb → v1/credentials.rb} +12 -2
  9. data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +2 -2
  10. data/lib/google/cloud/speech/v1/doc/google/longrunning/operations.rb +92 -0
  11. data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +1 -1
  12. data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +1 -1
  13. data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +1 -1
  14. data/lib/google/cloud/speech/v1/doc/overview.rb +1 -1
  15. data/lib/google/cloud/speech/v1/helpers.rb +93 -0
  16. data/lib/google/cloud/speech/v1/speech_client.rb +26 -49
  17. data/lib/google/cloud/speech/v1/speech_client_config.json +5 -5
  18. data/lib/google/cloud/speech/v1/stream.rb +614 -0
  19. data/lib/google/cloud/speech/v1p1beta1.rb +126 -0
  20. data/lib/google/cloud/speech/v1p1beta1/cloud_speech_pb.rb +175 -0
  21. data/lib/google/cloud/speech/v1p1beta1/cloud_speech_services_pb.rb +54 -0
  22. data/lib/google/cloud/speech/v1p1beta1/credentials.rb +32 -0
  23. data/lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb +625 -0
  24. data/lib/google/cloud/speech/v1p1beta1/doc/google/longrunning/operations.rb +92 -0
  25. data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/any.rb +124 -0
  26. data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/duration.rb +90 -0
  27. data/lib/google/cloud/speech/v1p1beta1/doc/google/rpc/status.rb +83 -0
  28. data/lib/google/cloud/speech/v1p1beta1/doc/overview.rb +73 -0
  29. data/lib/google/cloud/speech/v1p1beta1/helpers.rb +93 -0
  30. data/lib/google/cloud/speech/v1p1beta1/speech_client.rb +322 -0
  31. data/lib/google/cloud/speech/v1p1beta1/speech_client_config.json +41 -0
  32. data/lib/google/cloud/speech/v1p1beta1/stream.rb +614 -0
  33. metadata +29 -120
  34. data/lib/google-cloud-speech.rb +0 -142
  35. data/lib/google/cloud/speech/audio.rb +0 -330
  36. data/lib/google/cloud/speech/convert.rb +0 -46
  37. data/lib/google/cloud/speech/credentials.rb +0 -57
  38. data/lib/google/cloud/speech/operation.rb +0 -262
  39. data/lib/google/cloud/speech/project.rb +0 -651
  40. data/lib/google/cloud/speech/result.rb +0 -240
  41. data/lib/google/cloud/speech/service.rb +0 -121
  42. data/lib/google/cloud/speech/stream.rb +0 -564
@@ -1,46 +0,0 @@
1
- # Copyright 2017 Google LLC
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- require "google/protobuf/duration_pb"
17
-
18
- module Google
19
- module Cloud
20
- module Speech
21
- ##
22
- # @private Helper module for converting Speech values.
23
- module Convert
24
- module ClassMethods
25
- def number_to_duration number
26
- return nil if number.nil?
27
-
28
- Google::Protobuf::Duration.new \
29
- seconds: number.to_i,
30
- nanos: (number.remainder(1) * 1000000000).round
31
- end
32
-
33
- def duration_to_number duration
34
- return nil if duration.nil?
35
-
36
- return duration.seconds if duration.nanos.zero?
37
-
38
- duration.seconds + (duration.nanos / 1000000000.0)
39
- end
40
- end
41
-
42
- extend ClassMethods
43
- end
44
- end
45
- end
46
- end
@@ -1,57 +0,0 @@
1
- # Copyright 2016 Google LLC
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- require "googleauth"
17
-
18
- module Google
19
- module Cloud
20
- module Speech
21
- ##
22
- # # Credentials
23
- #
24
- # Represents the authentication and authorization used to connect to the
25
- # Speech API.
26
- #
27
- # @example
28
- # require "google/cloud/speech"
29
- #
30
- # keyfile = "/path/to/keyfile.json"
31
- # creds = Google::Cloud::Speech::Credentials.new keyfile
32
- #
33
- # speech = Google::Cloud::Speech.new(
34
- # project_id: "my-project",
35
- # credentials: creds
36
- # )
37
- #
38
- # speech.project_id #=> "my-project"
39
- #
40
- class Credentials < Google::Auth::Credentials
41
- SCOPE = ["https://www.googleapis.com/auth/cloud-platform"].freeze
42
- PATH_ENV_VARS = %w[SPEECH_CREDENTIALS
43
- SPEECH_KEYFILE
44
- GOOGLE_CLOUD_CREDENTIALS
45
- GOOGLE_CLOUD_KEYFILE
46
- GCLOUD_KEYFILE].freeze
47
- JSON_ENV_VARS = %w[SPEECH_CREDENTIALS_JSON
48
- SPEECH_KEYFILE_JSON
49
- GOOGLE_CLOUD_CREDENTIALS_JSON
50
- GOOGLE_CLOUD_KEYFILE_JSON
51
- GCLOUD_KEYFILE_JSON].freeze
52
- DEFAULT_PATHS = \
53
- ["~/.config/gcloud/application_default_credentials.json"].freeze
54
- end
55
- end
56
- end
57
- end
@@ -1,262 +0,0 @@
1
- # Copyright 2016 Google LLC
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- require "google/cloud/speech/v1"
17
- require "google/cloud/errors"
18
-
19
- module Google
20
- module Cloud
21
- module Speech
22
- ##
23
- # # Operation
24
- #
25
- # A resource represents the long-running, asynchronous processing of a
26
- # speech-recognition operation. The op can be refreshed to retrieve
27
- # recognition results once the audio data has been processed.
28
- #
29
- # See {Project#process} and {Audio#process}.
30
- #
31
- # @see https://cloud.google.com/speech/docs/basics#async-responses
32
- # Asynchronous Speech API Responses
33
- # @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
34
- # Long-running Operation
35
- #
36
- # @example
37
- # require "google/cloud/speech"
38
- #
39
- # speech = Google::Cloud::Speech.new
40
- #
41
- # op = speech.process "path/to/audio.raw",
42
- # encoding: :linear16,
43
- # language: "en-US",
44
- # sample_rate: 16000
45
- #
46
- # op.done? #=> false
47
- # op.reload! # API call
48
- # op.done? #=> true
49
- # results = op.results
50
- #
51
- class Operation
52
- ##
53
- # @private The Google::Gax::Operation gRPC object.
54
- attr_accessor :grpc
55
-
56
- ##
57
- # @private Creates a new Job instance.
58
- def initialize
59
- @grpc = nil
60
- end
61
-
62
- ##
63
- # The unique identifier for the long running operation.
64
- #
65
- # @return [String] The unique identifier for the long running operation.
66
- #
67
- # @example
68
- # require "google/cloud/speech"
69
- #
70
- # speech = Google::Cloud::Speech.new
71
- #
72
- # op = speech.process "path/to/audio.raw",
73
- # encoding: :linear16,
74
- # language: "en-US",
75
- # sample_rate: 16000
76
- #
77
- # op.id #=> "1234567890"
78
- #
79
- def id
80
- @grpc.name
81
- end
82
-
83
- ##
84
- # Checks if the speech-recognition processing of the audio data is
85
- # complete.
86
- #
87
- # @return [boolean] `true` when complete, `false` otherwise.
88
- #
89
- # @example
90
- # require "google/cloud/speech"
91
- #
92
- # speech = Google::Cloud::Speech.new
93
- #
94
- # op = speech.process "path/to/audio.raw",
95
- # encoding: :linear16,
96
- # language: "en-US",
97
- # sample_rate: 16000
98
- #
99
- # op.done? #=> false
100
- #
101
- def done?
102
- @grpc.done?
103
- end
104
-
105
- ##
106
- # A speech recognition result corresponding to a portion of the audio.
107
- #
108
- # @return [Array<Result>] The transcribed text of audio recognized. If
109
- # the op is not done this will return `nil`.
110
- #
111
- # @example
112
- # require "google/cloud/speech"
113
- #
114
- # speech = Google::Cloud::Speech.new
115
- #
116
- # op = speech.process "path/to/audio.raw",
117
- # encoding: :linear16,
118
- # language: "en-US",
119
- # sample_rate: 16000
120
- #
121
- # op.done? #=> true
122
- # op.results? #=> true
123
- # results = op.results
124
- #
125
- def results
126
- return nil unless results?
127
- @grpc.response.results.map do |result_grpc|
128
- Result.from_grpc result_grpc
129
- end
130
- end
131
-
132
- ##
133
- # Checks if the speech-recognition processing of the audio data is
134
- # complete.
135
- #
136
- # @return [boolean] `true` when complete, `false` otherwise.
137
- #
138
- # @example
139
- # require "google/cloud/speech"
140
- #
141
- # speech = Google::Cloud::Speech.new
142
- #
143
- # op = speech.process "path/to/audio.raw",
144
- # encoding: :linear16,
145
- # language: "en-US",
146
- # sample_rate: 16000
147
- #
148
- # op.done? #=> true
149
- # op.results? #=> true
150
- # results = op.results
151
- #
152
- def results?
153
- @grpc.response?
154
- end
155
-
156
- ##
157
- # The error information if the speech-recognition processing of the
158
- # audio data has returned an error.
159
- #
160
- # @return [Google::Cloud::Error] The error.
161
- #
162
- # @example
163
- # require "google/cloud/speech"
164
- #
165
- # speech = Google::Cloud::Speech.new
166
- #
167
- # op = speech.process "path/to/audio.raw",
168
- # encoding: :linear16,
169
- # language: "en-US",
170
- # sample_rate: 16000
171
- #
172
- # op.done? #=> true
173
- # op.error? #=> true
174
- # error = op.error
175
- #
176
- def error
177
- return nil unless error?
178
- Google::Cloud::Error.from_error @grpc.error
179
- end
180
-
181
- ##
182
- # Checks if the speech-recognition processing of the audio data has
183
- # returned an error.
184
- #
185
- # @return [boolean] `true` when errored, `false` otherwise.
186
- #
187
- # @example
188
- # require "google/cloud/speech"
189
- #
190
- # speech = Google::Cloud::Speech.new
191
- #
192
- # op = speech.process "path/to/audio.raw",
193
- # encoding: :linear16,
194
- # language: "en-US",
195
- # sample_rate: 16000
196
- #
197
- # op.done? #=> true
198
- # op.error? #=> true
199
- # error = op.error
200
- #
201
- def error?
202
- @grpc.error?
203
- end
204
-
205
- ##
206
- # Reloads the op with current data from the long-running, asynchronous
207
- # processing of a speech-recognition operation.
208
- #
209
- # @example
210
- # require "google/cloud/speech"
211
- #
212
- # speech = Google::Cloud::Speech.new
213
- #
214
- # op = speech.process "path/to/audio.raw",
215
- # encoding: :linear16,
216
- # language: "en-US",
217
- # sample_rate: 16000
218
- #
219
- # op.done? #=> false
220
- # op.reload! # API call
221
- # op.done? #=> true
222
- #
223
- def reload!
224
- @grpc.reload!
225
- self
226
- end
227
- alias refresh! reload!
228
-
229
- ##
230
- # Reloads the op until the operation is complete. The delay between
231
- # reloads will incrementally increase.
232
- #
233
- # @example
234
- # require "google/cloud/speech"
235
- #
236
- # speech = Google::Cloud::Speech.new
237
- #
238
- # op = speech.process "path/to/audio.raw",
239
- # encoding: :linear16,
240
- # language: "en-US",
241
- # sample_rate: 16000
242
- #
243
- # op.done? #=> false
244
- # op.wait_until_done!
245
- # op.done? #=> true
246
- #
247
- def wait_until_done!
248
- @grpc.wait_until_done!
249
- end
250
-
251
- ##
252
- # @private New Result::Job from a Google::Gax::Operation
253
- # object.
254
- def self.from_grpc grpc
255
- new.tap do |job|
256
- job.instance_variable_set :@grpc, grpc
257
- end
258
- end
259
- end
260
- end
261
- end
262
- end
@@ -1,651 +0,0 @@
1
- # Copyright 2016 Google LLC
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- require "google/cloud/errors"
17
- require "google/cloud/speech/service"
18
- require "google/cloud/speech/audio"
19
- require "google/cloud/speech/result"
20
- require "google/cloud/speech/operation"
21
- require "google/cloud/speech/stream"
22
-
23
- module Google
24
- module Cloud
25
- module Speech
26
- ##
27
- # # Project
28
- #
29
- # The Google Cloud Speech API enables developers to convert audio to text
30
- # by applying powerful neural network models. The API recognizes over 80
31
- # languages and variants, to support your global user base. You can
32
- # transcribe the text of users dictating to an application's microphone,
33
- # enable command-and-control through voice, or transcribe audio files,
34
- # among many other use cases. Recognize audio uploaded in the request, and
35
- # integrate with your audio storage on Google Cloud Storage, by using the
36
- # same technology Google uses to power its own products.
37
- #
38
- # See {Google::Cloud#speech}
39
- #
40
- # @example
41
- # require "google/cloud/speech"
42
- #
43
- # speech = Google::Cloud::Speech.new
44
- #
45
- # audio = speech.audio "path/to/audio.raw",
46
- # encoding: :linear16,
47
- # language: "en-US",
48
- # sample_rate: 16000
49
- # results = audio.recognize
50
- #
51
- # result = results.first
52
- # result.transcript #=> "how old is the Brooklyn Bridge"
53
- # result.confidence #=> 0.9826789498329163
54
- #
55
- class Project
56
- ##
57
- # @private The gRPC Service object.
58
- attr_accessor :service
59
-
60
- ##
61
- # @private Creates a new Speech Project instance.
62
- def initialize service
63
- @service = service
64
- end
65
-
66
- # The Speech project connected to.
67
- #
68
- # @example
69
- # require "google/cloud/speech"
70
- #
71
- # speech = Google::Cloud::Speech.new(
72
- # project_id: "my-project",
73
- # credentials: "/path/to/keyfile.json"
74
- # )
75
- #
76
- # speech.project_id #=> "my-project"
77
- #
78
- def project_id
79
- service.project
80
- end
81
- alias project project_id
82
-
83
- ##
84
- # Returns a new Audio instance from the given source. No API call is
85
- # made.
86
- #
87
- # @see https://cloud.google.com/speech/docs/basics#audio-encodings
88
- # Audio Encodings
89
- # @see https://cloud.google.com/speech/docs/basics#sample-rates
90
- # Sample Rates
91
- # @see https://cloud.google.com/speech/docs/basics#languages
92
- # Languages
93
- #
94
- # @param [String, IO, Google::Cloud::Storage::File] source A string of
95
- # the path to the audio file to be recognized, or a File or other IO
96
- # object of the audio contents, or a Cloud Storage URI of the form
97
- # `"gs://bucketname/path/to/document.ext"`; or an instance of
98
- # Google::Cloud::Storage::File of the text to be annotated.
99
- # @param [String, Symbol] encoding Encoding of audio data to be
100
- # recognized. Optional.
101
- #
102
- # Acceptable values are:
103
- #
104
- # * `linear16` - Uncompressed 16-bit signed little-endian samples.
105
- # (LINEAR16)
106
- # * `flac` - The [Free Lossless Audio
107
- # Codec](http://flac.sourceforge.net/documentation.html) encoding.
108
- # Only 16-bit samples are supported. Not all fields in STREAMINFO
109
- # are supported. (FLAC)
110
- # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
111
- # G.711 PCMU/mu-law. (MULAW)
112
- # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
113
- # be 8000 Hz.) (AMR)
114
- # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
115
- # be 16000 Hz.) (AMR_WB)
116
- # * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
117
- #
118
- # Lossy codecs do not recommend, as they result in a lower-quality
119
- # speech transcription.
120
- # * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
121
- #
122
- # Lossy codecs do not recommend, as they result in a lower-quality
123
- # speech transcription. If you must use a low-bitrate encoder,
124
- # OGG_OPUS is preferred.
125
- #
126
- # @param [String,Symbol] language The language of the supplied audio as
127
- # a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
128
- # "en-US" for English (United States), "en-GB" for English (United
129
- # Kingdom), "fr-FR" for French (France). See [Language
130
- # Support](https://cloud.google.com/speech/docs/languages) for a list
131
- # of the currently supported language codes. Optional.
132
- # @param [Integer] sample_rate Sample rate in Hertz of the audio data
133
- # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
134
- # For best results, set the sampling rate of the audio source to 16000
135
- # Hz. If that's not possible, use the native sample rate of the audio
136
- # source (instead of re-sampling). Optional.
137
- #
138
- # @return [Audio] The audio file to be recognized.
139
- #
140
- # @example
141
- # require "google/cloud/speech"
142
- #
143
- # speech = Google::Cloud::Speech.new
144
- #
145
- # audio = speech.audio "path/to/audio.raw",
146
- # encoding: :linear16,
147
- # language: "en-US",
148
- # sample_rate: 16000
149
- #
150
- # @example With a Google Cloud Storage URI:
151
- # require "google/cloud/speech"
152
- #
153
- # speech = Google::Cloud::Speech.new
154
- #
155
- # audio = speech.audio "gs://bucket-name/path/to/audio.raw",
156
- # encoding: :linear16,
157
- # language: "en-US",
158
- # sample_rate: 16000
159
- #
160
- # @example With a Google Cloud Storage File object:
161
- # require "google/cloud/storage"
162
- #
163
- # storage = Google::Cloud::Storage.new
164
- #
165
- # bucket = storage.bucket "bucket-name"
166
- # file = bucket.file "path/to/audio.raw"
167
- #
168
- # require "google/cloud/speech"
169
- #
170
- # speech = Google::Cloud::Speech.new
171
- #
172
- # audio = speech.audio file,
173
- # encoding: :linear16,
174
- # language: "en-US",
175
- # sample_rate: 16000
176
- #
177
- def audio source, encoding: nil, language: nil, sample_rate: nil
178
- audio = if source.is_a? Audio
179
- source.dup
180
- else
181
- Audio.from_source source, self
182
- end
183
- audio.encoding = encoding unless encoding.nil?
184
- audio.language = language unless language.nil?
185
- audio.sample_rate = sample_rate unless sample_rate.nil?
186
- audio
187
- end
188
-
189
- ##
190
- # Performs synchronous speech recognition. Sends audio data to the
191
- # Speech API, which performs recognition on that data, and returns
192
- # results only after all audio has been processed. Limited to audio data
193
- # of 1 minute or less in duration.
194
- #
195
- # The Speech API will take roughly the same amount of time to process
196
- # audio data sent synchronously as the duration of the supplied audio
197
- # data. That is, if you send audio data of 30 seconds in length, expect
198
- # the synchronous request to take approximately 30 seconds to return
199
- # results.
200
- #
201
- # @see https://cloud.google.com/speech/docs/basics#synchronous-recognition
202
- # Synchronous Speech API Recognition
203
- # @see https://cloud.google.com/speech/docs/basics#phrase-hints
204
- # Phrase Hints
205
- #
206
- # @param [String, IO, Google::Cloud::Storage::File] source A string of
207
- # the path to the audio file to be recognized, or a File or other IO
208
- # object of the audio contents, or a Cloud Storage URI of the form
209
- # `"gs://bucketname/path/to/document.ext"`; or an instance of
210
- # Google::Cloud::Storage::File of the text to be annotated.
211
- # @param [String, Symbol] encoding Encoding of audio data to be
212
- # recognized. Optional.
213
- #
214
- # Acceptable values are:
215
- #
216
- # * `linear16` - Uncompressed 16-bit signed little-endian samples.
217
- # (LINEAR16)
218
- # * `flac` - The [Free Lossless Audio
219
- # Codec](http://flac.sourceforge.net/documentation.html) encoding.
220
- # Only 16-bit samples are supported. Not all fields in STREAMINFO
221
- # are supported. (FLAC)
222
- # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
223
- # G.711 PCMU/mu-law. (MULAW)
224
- # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
225
- # be 8000 Hz.) (AMR)
226
- # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
227
- # be 16000 Hz.) (AMR_WB)
228
- # * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
229
- #
230
- # Lossy codecs do not recommend, as they result in a lower-quality
231
- # speech transcription.
232
- # * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
233
- #
234
- # Lossy codecs do not recommend, as they result in a lower-quality
235
- # speech transcription. If you must use a low-bitrate encoder,
236
- # OGG_OPUS is preferred.
237
- #
238
- # @param [String,Symbol] language The language of the supplied audio as
239
- # a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
240
- # "en-US" for English (United States), "en-GB" for English (United
241
- # Kingdom), "fr-FR" for French (France). See [Language
242
- # Support](https://cloud.google.com/speech/docs/languages) for a list
243
- # of the currently supported language codes. Optional.
244
- # @param [Integer] sample_rate Sample rate in Hertz of the audio data
245
- # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
246
- # For best results, set the sampling rate of the audio source to 16000
247
- # Hz. If that's not possible, use the native sample rate of the audio
248
- # source (instead of re-sampling). Optional.
249
- # @param [String] max_alternatives The Maximum number of recognition
250
- # hypotheses to be returned. Default is 1. The service may return
251
- # fewer. Valid values are 0-30. Defaults to 1. Optional.
252
- # @param [Boolean] profanity_filter When `true`, the service will
253
- # attempt to filter out profanities, replacing all but the initial
254
- # character in each filtered word with asterisks, e.g. "f***". Default
255
- # is `false`.
256
- # @param [Array<String>] phrases A list of strings containing words and
257
- # phrases "hints" so that the speech recognition is more likely to
258
- # recognize them. See [usage
259
- # limits](https://cloud.google.com/speech/limits#content). Optional.
260
- # @param [Boolean] words When `true`, return a list of words with
261
- # additional information about each word. Currently, the only
262
- # additional information provided is the the start and end time
263
- # offsets. See {Result#words}. Default is `false`.
264
- #
265
- # @return [Array<Result>] The transcribed text of audio recognized.
266
- #
267
- # @example
268
- # require "google/cloud/speech"
269
- #
270
- # speech = Google::Cloud::Speech.new
271
- #
272
- # results = speech.recognize "path/to/audio.raw",
273
- # encoding: :linear16,
274
- # language: "en-US",
275
- # sample_rate: 16000
276
- #
277
- # @example With a Google Cloud Storage URI:
278
- # require "google/cloud/speech"
279
- #
280
- # speech = Google::Cloud::Speech.new
281
- #
282
- # results = speech.recognize "gs://bucket-name/path/to/audio.raw",
283
- # encoding: :linear16,
284
- # language: "en-US",
285
- # sample_rate: 16000
286
- #
287
- # @example With a Google Cloud Storage File object:
288
- # require "google/cloud/storage"
289
- #
290
- # storage = Google::Cloud::Storage.new
291
- #
292
- # bucket = storage.bucket "bucket-name"
293
- # file = bucket.file "path/to/audio.raw"
294
- #
295
- # require "google/cloud/speech"
296
- #
297
- # speech = Google::Cloud::Speech.new
298
- #
299
- # results = speech.recognize file,
300
- # encoding: :linear16,
301
- # language: "en-US",
302
- # sample_rate: 16000,
303
- # max_alternatives: 10
304
- #
305
- def recognize source, encoding: nil, language: nil, sample_rate: nil,
306
- max_alternatives: nil, profanity_filter: nil,
307
- phrases: nil, words: nil
308
- ensure_service!
309
-
310
- audio_obj = audio source, encoding: encoding, language: language,
311
- sample_rate: sample_rate
312
-
313
- config = audio_config(
314
- encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
315
- language: audio_obj.language, max_alternatives: max_alternatives,
316
- profanity_filter: profanity_filter, phrases: phrases,
317
- words: words
318
- )
319
-
320
- grpc = service.recognize_sync audio_obj.to_grpc, config
321
- grpc.results.map do |result_grpc|
322
- Result.from_grpc result_grpc
323
- end
324
- end
325
-
326
- ##
327
- # Performs asynchronous speech recognition. Requests are processed
328
- # asynchronously, meaning a Operation is returned once the audio data
329
- # has been sent, and can be refreshed to retrieve recognition results
330
- # once the audio data has been processed.
331
- #
332
- # @see https://cloud.google.com/speech/docs/basics#async-responses
333
- # Asynchronous Speech API Responses
334
- #
335
- # @param [String, IO, Google::Cloud::Storage::File] source A string of
336
- # the path to the audio file to be recognized, or a File or other IO
337
- # object of the audio contents, or a Cloud Storage URI of the form
338
- # `"gs://bucketname/path/to/document.ext"`; or an instance of
339
- # Google::Cloud::Storage::File of the text to be annotated.
340
- # @param [String, Symbol] encoding Encoding of audio data to be
341
- # recognized. Optional.
342
- #
343
- # Acceptable values are:
344
- #
345
- # * `linear16` - Uncompressed 16-bit signed little-endian samples.
346
- # (LINEAR16)
347
- # * `flac` - The [Free Lossless Audio
348
- # Codec](http://flac.sourceforge.net/documentation.html) encoding.
349
- # Only 16-bit samples are supported. Not all fields in STREAMINFO
350
- # are supported. (FLAC)
351
- # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
352
- # G.711 PCMU/mu-law. (MULAW)
353
- # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
354
- # be 8000 Hz.) (AMR)
355
- # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
356
- # be 16000 Hz.) (AMR_WB)
357
- # * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
358
- #
359
- # Lossy codecs do not recommend, as they result in a lower-quality
360
- # speech transcription.
361
- # * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
362
- #
363
- # Lossy codecs do not recommend, as they result in a lower-quality
364
- # speech transcription. If you must use a low-bitrate encoder,
365
- # OGG_OPUS is preferred.
366
- #
367
- # @param [String,Symbol] language The language of the supplied audio as
368
- # a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
369
- # "en-US" for English (United States), "en-GB" for English (United
370
- # Kingdom), "fr-FR" for French (France). See [Language
371
- # Support](https://cloud.google.com/speech/docs/languages) for a list
372
- # of the currently supported language codes. Optional.
373
- # @param [Integer] sample_rate Sample rate in Hertz of the audio data
374
- # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
375
- # For best results, set the sampling rate of the audio source to 16000
376
- # Hz. If that's not possible, use the native sample rate of the audio
377
- # source (instead of re-sampling). Optional.
378
- # @param [String] max_alternatives The Maximum number of recognition
379
- # hypotheses to be returned. Default is 1. The service may return
380
- # fewer. Valid values are 0-30. Defaults to 1. Optional.
381
- # @param [Boolean] profanity_filter When `true`, the service will
382
- # attempt to filter out profanities, replacing all but the initial
383
- # character in each filtered word with asterisks, e.g. "f***". Default
384
- # is `false`.
385
- # @param [Array<String>] phrases A list of strings containing words and
386
- # phrases "hints" so that the speech recognition is more likely to
387
- # recognize them. See [usage
388
- # limits](https://cloud.google.com/speech/limits#content). Optional.
389
- # @param [Boolean] words When `true`, return a list of words with
390
- # additional information about each word. Currently, the only
391
- # additional information provided is the the start and end time
392
- # offsets. See {Result#words}. Default is `false`.
393
- #
394
- # @return [Operation] A resource represents the long-running,
395
- # asynchronous processing of a speech-recognition operation.
396
- #
397
- # @example
398
- # require "google/cloud/speech"
399
- #
400
- # speech = Google::Cloud::Speech.new
401
- #
402
- # op = speech.process "path/to/audio.raw",
403
- # encoding: :linear16,
404
- # language: "en-US",
405
- # sample_rate: 16000
406
- #
407
- # op.done? #=> false
408
- # op.reload!
409
- #
410
- # @example With a Google Cloud Storage URI:
411
- # require "google/cloud/speech"
412
- #
413
- # speech = Google::Cloud::Speech.new
414
- #
415
- # op = speech.process "gs://bucket-name/path/to/audio.raw",
416
- # encoding: :linear16,
417
- # language: "en-US",
418
- # sample_rate: 16000
419
- #
420
- # op.done? #=> false
421
- # op.reload!
422
- #
423
- # @example With a Google Cloud Storage File object:
424
- # require "google/cloud/storage"
425
- #
426
- # storage = Google::Cloud::Storage.new
427
- #
428
- # bucket = storage.bucket "bucket-name"
429
- # file = bucket.file "path/to/audio.raw"
430
- #
431
- # require "google/cloud/speech"
432
- #
433
- # speech = Google::Cloud::Speech.new
434
- #
435
- # op = speech.process file,
436
- # encoding: :linear16,
437
- # language: "en-US",
438
- # sample_rate: 16000,
439
- # max_alternatives: 10
440
- #
441
- # op.done? #=> false
442
- # op.reload!
443
- #
444
- def process source, encoding: nil, sample_rate: nil, language: nil,
445
- max_alternatives: nil, profanity_filter: nil, phrases: nil,
446
- words: nil
447
- ensure_service!
448
-
449
- audio_obj = audio source, encoding: encoding, language: language,
450
- sample_rate: sample_rate
451
-
452
- config = audio_config(
453
- encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
454
- language: audio_obj.language, max_alternatives: max_alternatives,
455
- profanity_filter: profanity_filter, phrases: phrases,
456
- words: words
457
- )
458
-
459
- grpc = service.recognize_async audio_obj.to_grpc, config
460
- Operation.from_grpc grpc
461
- end
462
- alias long_running_recognize process
463
- alias recognize_job process
464
-
465
- ##
466
- # Creates a Stream object to perform bidirectional streaming
467
- # speech-recognition: receive results while sending audio.
468
- #
469
- # @see https://cloud.google.com/speech/docs/basics#streaming-recognition
470
- # Streaming Speech API Recognition Requests
471
- #
472
- # @param [String, Symbol] encoding Encoding of audio data to be
473
- # recognized. Optional.
474
- #
475
- # Acceptable values are:
476
- #
477
- # * `linear16` - Uncompressed 16-bit signed little-endian samples.
478
- # (LINEAR16)
479
- # * `flac` - The [Free Lossless Audio
480
- # Codec](http://flac.sourceforge.net/documentation.html) encoding.
481
- # Only 16-bit samples are supported. Not all fields in STREAMINFO
482
- # are supported. (FLAC)
483
- # * `mulaw` - 8-bit samples that compand 14-bit audio samples using
484
- # G.711 PCMU/mu-law. (MULAW)
485
- # * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
486
- # be 8000 Hz.) (AMR)
487
- # * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
488
- # be 16000 Hz.) (AMR_WB)
489
- # * `ogg_opus` - Ogg Mapping for Opus. (OGG_OPUS)
490
- #
491
- # Lossy codecs do not recommend, as they result in a lower-quality
492
- # speech transcription.
493
- # * `speex` - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
494
- #
495
- # Lossy codecs do not recommend, as they result in a lower-quality
496
- # speech transcription. If you must use a low-bitrate encoder,
497
- # OGG_OPUS is preferred.
498
- #
499
- # @param [String,Symbol] language The language of the supplied audio as
500
- # a [BCP-47](https://tools.ietf.org/html/bcp47) language code. e.g.
501
- # "en-US" for English (United States), "en-GB" for English (United
502
- # Kingdom), "fr-FR" for French (France). See [Language
503
- # Support](https://cloud.google.com/speech/docs/languages) for a list
504
- # of the currently supported language codes. Optional.
505
- # @param [Integer] sample_rate Sample rate in Hertz of the audio data
506
- # to be recognized. Valid values are: 8000-48000. 16000 is optimal.
507
- # For best results, set the sampling rate of the audio source to 16000
508
- # Hz. If that's not possible, use the native sample rate of the audio
509
- # source (instead of re-sampling). Optional.
510
- # @param [String] max_alternatives The Maximum number of recognition
511
- # hypotheses to be returned. Default is 1. The service may return
512
- # fewer. Valid values are 0-30. Defaults to 1. Optional.
513
- # @param [Boolean] profanity_filter When `true`, the service will
514
- # attempt to filter out profanities, replacing all but the initial
515
- # character in each filtered word with asterisks, e.g. "f***". Default
516
- # is `false`.
517
- # @param [Array<String>] phrases A list of strings containing words and
518
- # phrases "hints" so that the speech recognition is more likely to
519
- # recognize them. See [usage
520
- # limits](https://cloud.google.com/speech/limits#content). Optional.
521
- # @param [Boolean] words When `true`, return a list of words with
522
- # additional information about each word. Currently, the only
523
- # additional information provided is the the start and end time
524
- # offsets. See {Result#words}. Default is `false`.
525
- # @param [Boolean] utterance When `true`, the service will perform
526
- # continuous recognition (continuing to process audio even if the user
527
- # pauses speaking) until the client closes the output stream (gRPC
528
- # API) or when the maximum time limit has been reached. Default is
529
- # `false`.
530
- # @param [Boolean] interim When `true`, interim results (tentative
531
- # hypotheses) may be returned as they become available. Default is
532
- # `false`.
533
- #
534
- # @return [Stream] A resource that represents the streaming requests and
535
- # responses.
536
- #
537
- # @example
538
- # require "google/cloud/speech"
539
- #
540
- # speech = Google::Cloud::Speech.new
541
- #
542
- # stream = speech.stream encoding: :linear16,
543
- # language: "en-US",
544
- # sample_rate: 16000
545
- #
546
- # # Stream 5 seconds of audio from the microphone
547
- # # Actual implementation of microphone input varies by platform
548
- # 5.times do
549
- # stream.send MicrophoneInput.read(32000)
550
- # end
551
- #
552
- # stream.stop
553
- # stream.wait_until_complete!
554
- #
555
- # results = stream.results
556
- # result = results.first
557
- # result.transcript #=> "how old is the Brooklyn Bridge"
558
- # result.confidence #=> 0.9826789498329163
559
- #
560
- def stream encoding: nil, language: nil, sample_rate: nil,
561
- max_alternatives: nil, profanity_filter: nil, phrases: nil,
562
- words: nil, utterance: nil, interim: nil
563
- ensure_service!
564
-
565
- grpc_req = V1::StreamingRecognizeRequest.new(
566
- streaming_config: V1::StreamingRecognitionConfig.new(
567
- {
568
- config: audio_config(encoding: convert_encoding(encoding),
569
- language: language,
570
- sample_rate: sample_rate,
571
- max_alternatives: max_alternatives,
572
- profanity_filter: profanity_filter,
573
- phrases: phrases, words: words),
574
- single_utterance: utterance,
575
- interim_results: interim
576
- }.delete_if { |_, v| v.nil? }
577
- )
578
- )
579
-
580
- Stream.new service, grpc_req
581
- end
582
- alias stream_recognize stream
583
-
584
- ##
585
- # Performs asynchronous speech recognition. Requests are processed
586
- # asynchronously, meaning a Operation is returned once the audio data
587
- # has been sent, and can be refreshed to retrieve recognition results
588
- # once the audio data has been processed.
589
- #
590
- # @see https://cloud.google.com/speech/reference/rpc/google.longrunning#google.longrunning.Operations
591
- # Long-running Operation
592
- #
593
- # @param [String] id The unique identifier for the long running
594
- # operation. Required.
595
- #
596
- # @return [Operation] A resource represents the long-running,
597
- # asynchronous processing of a speech-recognition operation.
598
- #
599
- # @example
600
- # require "google/cloud/speech"
601
- #
602
- # speech = Google::Cloud::Speech.new
603
- #
604
- # op = speech.operation "1234567890"
605
- #
606
- # op.done? #=> false
607
- # op.reload!
608
- #
609
- def operation id
610
- ensure_service!
611
-
612
- grpc = service.get_op id
613
- Operation.from_grpc grpc
614
- end
615
-
616
- protected
617
-
618
- def audio_config encoding: nil, language: nil, sample_rate: nil,
619
- max_alternatives: nil, profanity_filter: nil,
620
- phrases: nil, words: nil
621
- contexts = nil
622
- contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
623
- language = String(language) unless language.nil?
624
- V1::RecognitionConfig.new({
625
- encoding: convert_encoding(encoding),
626
- language_code: language,
627
- sample_rate_hertz: sample_rate,
628
- max_alternatives: max_alternatives,
629
- profanity_filter: profanity_filter,
630
- speech_contexts: contexts,
631
- enable_word_time_offsets: words
632
- }.delete_if { |_, v| v.nil? })
633
- end
634
-
635
- def convert_encoding encoding
636
- mapping = { linear: :LINEAR16, linear16: :LINEAR16,
637
- flac: :FLAC, mulaw: :MULAW, amr: :AMR, amr_wb: :AMR_WB,
638
- ogg_opus: :OGG_OPUS, speex: :SPEEX_WITH_HEADER_BYTE }
639
- mapping[encoding] || encoding
640
- end
641
-
642
- ##
643
- # @private Raise an error unless an active connection to the service is
644
- # available.
645
- def ensure_service!
646
- raise "Must have active connection to service" unless service
647
- end
648
- end
649
- end
650
- end
651
- end