google-cloud-speech 0.25.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google/cloud/speech/audio.rb +13 -4
- data/lib/google/cloud/speech/convert.rb +46 -0
- data/lib/google/cloud/speech/project.rb +25 -8
- data/lib/google/cloud/speech/result.rb +44 -6
- data/lib/google/cloud/speech/v1.rb +68 -0
- data/lib/google/cloud/speech/v1/cloud_speech_pb.rb +8 -0
- data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +61 -29
- data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +2 -2
- data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +77 -0
- data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +19 -19
- data/lib/google/cloud/speech/v1/speech_client.rb +36 -39
- data/lib/google/cloud/speech/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 454b1310e5e39c2b4ac676c26b6aea8a89f452c5
|
4
|
+
data.tar.gz: 12fa7c7129f20bdf92c5e61c3e1e294307937f1f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dca3bf30a3edad90e5dec095183a4a721491b2cef886ecc82fa4a83afe6fdc3c5e1387b336f9d0a96286b6605dc58cead675242e8d04338b2c44c17896c9b3fd
|
7
|
+
data.tar.gz: 6ce650dbbeb8cccac3a3e63c290b191ff3c92436d65cb1a1887d6e3480ca4707dfd326017186f3da278f913e70057258d03469df503f9fcd0ce6c29a944e4fb5
|
@@ -191,6 +191,10 @@ module Google
|
|
191
191
|
# phrases "hints" so that the speech recognition is more likely to
|
192
192
|
# recognize them. See [usage
|
193
193
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
194
|
+
# @param [Boolean] words When `true`, return a list of words with
|
195
|
+
# additional information about each word. Currently, the only
|
196
|
+
# additional information provided is the the start and end time
|
197
|
+
# offsets. See {Result#words}. Default is `false`.
|
194
198
|
#
|
195
199
|
# @return [Array<Result>] The transcribed text of audio recognized.
|
196
200
|
#
|
@@ -209,14 +213,15 @@ module Google
|
|
209
213
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
210
214
|
# result.confidence #=> 0.9826789498329163
|
211
215
|
#
|
212
|
-
def recognize max_alternatives: nil, profanity_filter: nil,
|
216
|
+
def recognize max_alternatives: nil, profanity_filter: nil,
|
217
|
+
phrases: nil, words: nil
|
213
218
|
ensure_speech!
|
214
219
|
|
215
220
|
speech.recognize self, encoding: encoding, sample_rate: sample_rate,
|
216
221
|
language: language,
|
217
222
|
max_alternatives: max_alternatives,
|
218
223
|
profanity_filter: profanity_filter,
|
219
|
-
phrases: phrases
|
224
|
+
phrases: phrases, words: words
|
220
225
|
end
|
221
226
|
|
222
227
|
##
|
@@ -239,6 +244,10 @@ module Google
|
|
239
244
|
# phrases "hints" so that the speech recognition is more likely to
|
240
245
|
# recognize them. See [usage
|
241
246
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
247
|
+
# @param [Boolean] words When `true`, return a list of words with
|
248
|
+
# additional information about each word. Currently, the only
|
249
|
+
# additional information provided is the the start and end time
|
250
|
+
# offsets. See {Result#words}. Default is `false`.
|
242
251
|
#
|
243
252
|
# @return [Operation] A resource represents the long-running,
|
244
253
|
# asynchronous processing of a speech-recognition operation.
|
@@ -260,7 +269,7 @@ module Google
|
|
260
269
|
# results = op.results
|
261
270
|
#
|
262
271
|
def process max_alternatives: nil, profanity_filter: nil,
|
263
|
-
phrases: nil
|
272
|
+
phrases: nil, words: nil
|
264
273
|
ensure_speech!
|
265
274
|
|
266
275
|
speech.process self, encoding: encoding,
|
@@ -268,7 +277,7 @@ module Google
|
|
268
277
|
language: language,
|
269
278
|
max_alternatives: max_alternatives,
|
270
279
|
profanity_filter: profanity_filter,
|
271
|
-
phrases: phrases
|
280
|
+
phrases: phrases, words: words
|
272
281
|
end
|
273
282
|
alias_method :long_running_recognize, :process
|
274
283
|
alias_method :recognize_job, :process
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Copyright 2017 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/protobuf/duration_pb"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Speech
|
21
|
+
##
|
22
|
+
# @private Helper module for converting Speech values.
|
23
|
+
module Convert
|
24
|
+
module ClassMethods
|
25
|
+
def number_to_duration number
|
26
|
+
return nil if number.nil?
|
27
|
+
|
28
|
+
Google::Protobuf::Duration.new \
|
29
|
+
seconds: number.to_i,
|
30
|
+
nanos: (number.remainder(1) * 1000000000).round
|
31
|
+
end
|
32
|
+
|
33
|
+
def duration_to_number duration
|
34
|
+
return nil if duration.nil?
|
35
|
+
|
36
|
+
return duration.seconds if duration.nanos == 0
|
37
|
+
|
38
|
+
duration.seconds + (duration.nanos / 1000000000.0)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
extend ClassMethods
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -266,6 +266,10 @@ module Google
|
|
266
266
|
# phrases "hints" so that the speech recognition is more likely to
|
267
267
|
# recognize them. See [usage
|
268
268
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
269
|
+
# @param [Boolean] words When `true`, return a list of words with
|
270
|
+
# additional information about each word. Currently, the only
|
271
|
+
# additional information provided is the the start and end time
|
272
|
+
# offsets. See {Result#words}. Default is `false`.
|
269
273
|
#
|
270
274
|
# @return [Array<Result>] The transcribed text of audio recognized.
|
271
275
|
#
|
@@ -308,7 +312,8 @@ module Google
|
|
308
312
|
# max_alternatives: 10
|
309
313
|
#
|
310
314
|
def recognize source, encoding: nil, language: nil, sample_rate: nil,
|
311
|
-
max_alternatives: nil, profanity_filter: nil,
|
315
|
+
max_alternatives: nil, profanity_filter: nil,
|
316
|
+
phrases: nil, words: nil
|
312
317
|
ensure_service!
|
313
318
|
|
314
319
|
audio_obj = audio source, encoding: encoding, language: language,
|
@@ -317,7 +322,8 @@ module Google
|
|
317
322
|
config = audio_config(
|
318
323
|
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
319
324
|
language: audio_obj.language, max_alternatives: max_alternatives,
|
320
|
-
profanity_filter: profanity_filter, phrases: phrases
|
325
|
+
profanity_filter: profanity_filter, phrases: phrases,
|
326
|
+
words: words)
|
321
327
|
|
322
328
|
grpc = service.recognize_sync audio_obj.to_grpc, config
|
323
329
|
grpc.results.map do |result_grpc|
|
@@ -388,6 +394,10 @@ module Google
|
|
388
394
|
# phrases "hints" so that the speech recognition is more likely to
|
389
395
|
# recognize them. See [usage
|
390
396
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
397
|
+
# @param [Boolean] words When `true`, return a list of words with
|
398
|
+
# additional information about each word. Currently, the only
|
399
|
+
# additional information provided is the the start and end time
|
400
|
+
# offsets. See {Result#words}. Default is `false`.
|
391
401
|
#
|
392
402
|
# @return [Operation] A resource represents the long-running,
|
393
403
|
# asynchronous processing of a speech-recognition operation.
|
@@ -440,7 +450,8 @@ module Google
|
|
440
450
|
# op.reload!
|
441
451
|
#
|
442
452
|
def process source, encoding: nil, sample_rate: nil, language: nil,
|
443
|
-
max_alternatives: nil, profanity_filter: nil, phrases: nil
|
453
|
+
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
454
|
+
words: nil
|
444
455
|
ensure_service!
|
445
456
|
|
446
457
|
audio_obj = audio source, encoding: encoding, language: language,
|
@@ -449,7 +460,8 @@ module Google
|
|
449
460
|
config = audio_config(
|
450
461
|
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
451
462
|
language: audio_obj.language, max_alternatives: max_alternatives,
|
452
|
-
profanity_filter: profanity_filter, phrases: phrases
|
463
|
+
profanity_filter: profanity_filter, phrases: phrases,
|
464
|
+
words: words)
|
453
465
|
|
454
466
|
grpc = service.recognize_async audio_obj.to_grpc, config
|
455
467
|
Operation.from_grpc grpc
|
@@ -513,6 +525,10 @@ module Google
|
|
513
525
|
# phrases "hints" so that the speech recognition is more likely to
|
514
526
|
# recognize them. See [usage
|
515
527
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
528
|
+
# @param [Boolean] words When `true`, return a list of words with
|
529
|
+
# additional information about each word. Currently, the only
|
530
|
+
# additional information provided is the the start and end time
|
531
|
+
# offsets. See {Result#words}. Default is `false`.
|
516
532
|
# @param [Boolean] utterance When `true`, the service will perform
|
517
533
|
# continuous recognition (continuing to process audio even if the user
|
518
534
|
# pauses speaking) until the client closes the output stream (gRPC
|
@@ -550,7 +566,7 @@ module Google
|
|
550
566
|
#
|
551
567
|
def stream encoding: nil, language: nil, sample_rate: nil,
|
552
568
|
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
553
|
-
utterance: nil, interim: nil
|
569
|
+
words: nil, utterance: nil, interim: nil
|
554
570
|
ensure_service!
|
555
571
|
|
556
572
|
grpc_req = V1::StreamingRecognizeRequest.new(
|
@@ -561,7 +577,7 @@ module Google
|
|
561
577
|
sample_rate: sample_rate,
|
562
578
|
max_alternatives: max_alternatives,
|
563
579
|
profanity_filter: profanity_filter,
|
564
|
-
phrases: phrases),
|
580
|
+
phrases: phrases, words: words),
|
565
581
|
single_utterance: utterance,
|
566
582
|
interim_results: interim
|
567
583
|
}.delete_if { |_, v| v.nil? }
|
@@ -608,7 +624,7 @@ module Google
|
|
608
624
|
|
609
625
|
def audio_config encoding: nil, language: nil, sample_rate: nil,
|
610
626
|
max_alternatives: nil, profanity_filter: nil,
|
611
|
-
phrases: nil
|
627
|
+
phrases: nil, words: nil
|
612
628
|
contexts = nil
|
613
629
|
contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
|
614
630
|
language = String(language) unless language.nil?
|
@@ -618,7 +634,8 @@ module Google
|
|
618
634
|
sample_rate_hertz: sample_rate,
|
619
635
|
max_alternatives: max_alternatives,
|
620
636
|
profanity_filter: profanity_filter,
|
621
|
-
speech_contexts: contexts
|
637
|
+
speech_contexts: contexts,
|
638
|
+
enable_word_time_offsets: words
|
622
639
|
}.delete_if { |_, v| v.nil? })
|
623
640
|
end
|
624
641
|
|
@@ -14,6 +14,7 @@
|
|
14
14
|
|
15
15
|
|
16
16
|
require "google/cloud/speech/v1"
|
17
|
+
require "google/cloud/speech/convert"
|
17
18
|
|
18
19
|
module Google
|
19
20
|
module Cloud
|
@@ -35,6 +36,10 @@ module Google
|
|
35
36
|
# recognition is correct. This field is typically provided only for the
|
36
37
|
# top hypothesis. A value of 0.0 is a sentinel value indicating
|
37
38
|
# confidence was not set.
|
39
|
+
# @attr_reader [Array<Result::Word>] words A list of words with additional
|
40
|
+
# information about each word. Currently, the only additional
|
41
|
+
# information provided is the the start and end time offsets. Available
|
42
|
+
# when using the `words` argument in relevant methods.
|
38
43
|
# @attr_reader [Array<Result::Alternative>] alternatives Additional
|
39
44
|
# recognition hypotheses (up to the value specified in
|
40
45
|
# `max_alternatives`). The server may return fewer than
|
@@ -56,13 +61,14 @@ module Google
|
|
56
61
|
# result.confidence #=> 0.9826789498329163
|
57
62
|
#
|
58
63
|
class Result
|
59
|
-
attr_reader :transcript, :confidence, :alternatives
|
64
|
+
attr_reader :transcript, :confidence, :words, :alternatives
|
60
65
|
|
61
66
|
##
|
62
67
|
# @private Creates a new Results instance.
|
63
|
-
def initialize transcript, confidence, alternatives = []
|
64
|
-
@transcript
|
65
|
-
@confidence
|
68
|
+
def initialize transcript, confidence, words = [], alternatives = []
|
69
|
+
@transcript = transcript
|
70
|
+
@confidence = confidence
|
71
|
+
@words = words
|
66
72
|
@alternatives = alternatives
|
67
73
|
end
|
68
74
|
|
@@ -71,10 +77,42 @@ module Google
|
|
71
77
|
def self.from_grpc grpc
|
72
78
|
head, *tail = *grpc.alternatives
|
73
79
|
return nil if head.nil?
|
80
|
+
words = Array(head.words).map do |w|
|
81
|
+
Word.new w.word, Convert.duration_to_number(w.start_time),
|
82
|
+
Convert.duration_to_number(w.end_time)
|
83
|
+
end
|
74
84
|
alternatives = tail.map do |alt|
|
75
85
|
Alternative.new alt.transcript, alt.confidence
|
76
86
|
end
|
77
|
-
new head.transcript, head.confidence, alternatives
|
87
|
+
new head.transcript, head.confidence, words, alternatives
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# Word-specific information for recognized words. Currently, the only
|
92
|
+
# additional information provided is the the start and end time offsets.
|
93
|
+
# Available when using the `words` argument in relevant methods.
|
94
|
+
#
|
95
|
+
# @attr_reader [String] word The word corresponding to this set of
|
96
|
+
# information.
|
97
|
+
# @attr_reader [Numeric] start_time Time offset relative to the
|
98
|
+
# beginning of the audio, and corresponding to the start of the spoken
|
99
|
+
# word. This field is only set if `words` was specified. This is an
|
100
|
+
# experimental feature and the accuracy of the time offset can vary.
|
101
|
+
# @attr_reader [Numeric] end_time Time offset relative to the
|
102
|
+
# beginning of the audio, and corresponding to the end of the spoken
|
103
|
+
# word. This field is only set if `words` was specified. This is an
|
104
|
+
# experimental feature and the accuracy of the time offset can vary.
|
105
|
+
class Word
|
106
|
+
attr_reader :word, :start_time, :end_time
|
107
|
+
alias_method :to_str, :word
|
108
|
+
|
109
|
+
##
|
110
|
+
# @private Creates a new Result::Word instance.
|
111
|
+
def initialize word, start_time, end_time
|
112
|
+
@word = word
|
113
|
+
@start_time = start_time
|
114
|
+
@end_time = end_time
|
115
|
+
end
|
78
116
|
end
|
79
117
|
|
80
118
|
##
|
@@ -114,7 +152,7 @@ module Google
|
|
114
152
|
##
|
115
153
|
# @private Creates a new Result::Alternative instance.
|
116
154
|
def initialize transcript, confidence
|
117
|
-
@transcript
|
155
|
+
@transcript = transcript
|
118
156
|
@confidence = confidence
|
119
157
|
end
|
120
158
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# Copyright 2017, Google Inc. All rights reserved.
|
2
|
+
#
|
2
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
3
4
|
# you may not use this file except in compliance with the License.
|
4
5
|
# You may obtain a copy of the License at
|
@@ -11,4 +12,71 @@
|
|
11
12
|
# See the License for the specific language governing permissions and
|
12
13
|
# limitations under the License.
|
13
14
|
|
15
|
+
module Google
|
16
|
+
module Cloud
|
17
|
+
# rubocop:disable LineLength
|
18
|
+
|
19
|
+
##
|
20
|
+
# # Ruby Client for Google Cloud Speech API ([Alpha](https://github.com/GoogleCloudPlatform/google-cloud-ruby#versioning))
|
21
|
+
#
|
22
|
+
# [Google Cloud Speech API][Product Documentation]: Google Cloud Speech API.
|
23
|
+
# - [Product Documentation][]
|
24
|
+
#
|
25
|
+
# ## Quick Start
|
26
|
+
# In order to use this library, you first need to go through the following steps:
|
27
|
+
#
|
28
|
+
# 1. [Select or create a Cloud Platform project.](https://console.cloud.google.com/project)
|
29
|
+
# 2. [Enable the Google Cloud Speech API.](https://console.cloud.google.com/apis/api/speech)
|
30
|
+
# 3. [Setup Authentication.](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud/master/guides/authentication)
|
31
|
+
#
|
32
|
+
# ### Installation
|
33
|
+
# ```
|
34
|
+
# $ gem install google-cloud-speech
|
35
|
+
# ```
|
36
|
+
#
|
37
|
+
# ### Preview
|
38
|
+
# #### SpeechClient
|
39
|
+
# ```rb
|
40
|
+
# require "google/cloud/speech/v1"
|
41
|
+
#
|
42
|
+
# speech_client = Google::Cloud::Speech::V1::SpeechClient.new
|
43
|
+
# language_code = "en-US"
|
44
|
+
# sample_rate_hertz = 44100
|
45
|
+
# encoding = :FLAC
|
46
|
+
# config = {
|
47
|
+
# language_code: language_code,
|
48
|
+
# sample_rate_hertz: sample_rate_hertz,
|
49
|
+
# encoding: encoding
|
50
|
+
# }
|
51
|
+
# uri = "gs://gapic-toolkit/hello.flac"
|
52
|
+
# audio = { uri: uri }
|
53
|
+
# response = speech_client.recognize(config, audio)
|
54
|
+
# ```
|
55
|
+
#
|
56
|
+
# ### Next Steps
|
57
|
+
# - Read the [Google Cloud Speech API Product documentation][Product Documentation] to learn more about the product and see How-to Guides.
|
58
|
+
# - View this [repository's main README](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/README.md) to see the full list of Cloud APIs that we cover.
|
59
|
+
#
|
60
|
+
# [Product Documentation]: https://cloud.google.com/speech
|
61
|
+
#
|
62
|
+
module Speech
|
63
|
+
# rubocop:enable LineLength
|
64
|
+
|
65
|
+
##
|
66
|
+
# # Google Cloud Speech API Contents
|
67
|
+
#
|
68
|
+
# | Class | Description |
|
69
|
+
# | ----- | ----------- |
|
70
|
+
# | [SpeechClient][] | Google Cloud Speech API. |
|
71
|
+
# | [Data Types][] | Data types for Google::Cloud::Speech::V1 |
|
72
|
+
#
|
73
|
+
# [SpeechClient]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/speechclient
|
74
|
+
# [Data Types]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/datatypes
|
75
|
+
#
|
76
|
+
module V1
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
14
82
|
require "google/cloud/speech/v1/speech_client"
|
@@ -36,6 +36,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
36
36
|
optional :max_alternatives, :int32, 4
|
37
37
|
optional :profanity_filter, :bool, 5
|
38
38
|
repeated :speech_contexts, :message, 6, "google.cloud.speech.v1.SpeechContext"
|
39
|
+
optional :enable_word_time_offsets, :bool, 8
|
39
40
|
end
|
40
41
|
add_enum "google.cloud.speech.v1.RecognitionConfig.AudioEncoding" do
|
41
42
|
value :ENCODING_UNSPECIFIED, 0
|
@@ -87,6 +88,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
87
88
|
add_message "google.cloud.speech.v1.SpeechRecognitionAlternative" do
|
88
89
|
optional :transcript, :string, 1
|
89
90
|
optional :confidence, :float, 2
|
91
|
+
repeated :words, :message, 3, "google.cloud.speech.v1.WordInfo"
|
92
|
+
end
|
93
|
+
add_message "google.cloud.speech.v1.WordInfo" do
|
94
|
+
optional :start_time, :message, 1, "google.protobuf.Duration"
|
95
|
+
optional :end_time, :message, 2, "google.protobuf.Duration"
|
96
|
+
optional :word, :string, 3
|
90
97
|
end
|
91
98
|
end
|
92
99
|
|
@@ -110,6 +117,7 @@ module Google
|
|
110
117
|
StreamingRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionResult").msgclass
|
111
118
|
SpeechRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionResult").msgclass
|
112
119
|
SpeechRecognitionAlternative = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionAlternative").msgclass
|
120
|
+
WordInfo = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.WordInfo").msgclass
|
113
121
|
end
|
114
122
|
end
|
115
123
|
end
|
@@ -56,7 +56,7 @@ module Google
|
|
56
56
|
# +audio_content+ data. The audio bytes must be encoded as specified in
|
57
57
|
# +RecognitionConfig+. Note: as with all bytes fields, protobuffers use a
|
58
58
|
# pure binary representation (not base64). See
|
59
|
-
#
|
59
|
+
# [audio limits](https://cloud.google.com/speech/limits#content).
|
60
60
|
class StreamingRecognizeRequest; end
|
61
61
|
|
62
62
|
# Provides information to the recognizer that specifies how to process the
|
@@ -101,9 +101,9 @@ module Google
|
|
101
101
|
# @!attribute [rw] language_code
|
102
102
|
# @return [String]
|
103
103
|
# *Required* The language of the supplied audio as a
|
104
|
-
#
|
104
|
+
# [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
|
105
105
|
# Example: "en-US".
|
106
|
-
# See
|
106
|
+
# See [Language Support](https://cloud.google.com/speech/docs/languages)
|
107
107
|
# for a list of the currently supported language codes.
|
108
108
|
# @!attribute [rw] max_alternatives
|
109
109
|
# @return [Integer]
|
@@ -122,11 +122,17 @@ module Google
|
|
122
122
|
# @!attribute [rw] speech_contexts
|
123
123
|
# @return [Array<Google::Cloud::Speech::V1::SpeechContext>]
|
124
124
|
# *Optional* A means to provide context to assist the speech recognition.
|
125
|
+
# @!attribute [rw] enable_word_time_offsets
|
126
|
+
# @return [true, false]
|
127
|
+
# *Optional* If +true+, the top result includes a list of words and
|
128
|
+
# the start and end time offsets (timestamps) for those words. If
|
129
|
+
# +false+, no word-level time offset information is returned. The default is
|
130
|
+
# +false+.
|
125
131
|
class RecognitionConfig
|
126
132
|
# Audio encoding of the data sent in the audio message. All encodings support
|
127
|
-
# only 1 channel (mono) audio. Only +FLAC+
|
128
|
-
# the bytes of audio that follow the header. The other encodings
|
129
|
-
# audio bytes with no header.
|
133
|
+
# only 1 channel (mono) audio. Only +FLAC+ and +WAV+ include a header that
|
134
|
+
# describes the bytes of audio that follow the header. The other encodings
|
135
|
+
# are raw audio bytes with no header.
|
130
136
|
#
|
131
137
|
# For best results, the audio source should be captured and transmitted using
|
132
138
|
# a lossless encoding (+FLAC+ or +LINEAR16+). Recognition accuracy may be
|
@@ -134,13 +140,13 @@ module Google
|
|
134
140
|
# this section, are used to capture or transmit the audio, particularly if
|
135
141
|
# background noise is present.
|
136
142
|
module AudioEncoding
|
137
|
-
# Not specified. Will return result Google::Rpc::Code::INVALID_ARGUMENT.
|
143
|
+
# Not specified. Will return result {Google::Rpc::Code::INVALID_ARGUMENT}.
|
138
144
|
ENCODING_UNSPECIFIED = 0
|
139
145
|
|
140
146
|
# Uncompressed 16-bit signed little-endian samples (Linear PCM).
|
141
147
|
LINEAR16 = 1
|
142
148
|
|
143
|
-
#
|
149
|
+
# [+FLAC+](https://xiph.org/flac/documentation.html) (Free Lossless Audio
|
144
150
|
# Codec) is the recommended encoding because it is
|
145
151
|
# lossless--therefore recognition is not compromised--and
|
146
152
|
# requires only about half the bandwidth of +LINEAR16+. +FLAC+ stream
|
@@ -158,17 +164,17 @@ module Google
|
|
158
164
|
AMR_WB = 5
|
159
165
|
|
160
166
|
# Opus encoded audio frames in Ogg container
|
161
|
-
# (
|
167
|
+
# ([OggOpus](https://wiki.xiph.org/OggOpus)).
|
162
168
|
# +sample_rate_hertz+ must be 16000.
|
163
169
|
OGG_OPUS = 6
|
164
170
|
|
165
171
|
# Although the use of lossy encodings is not recommended, if a very low
|
166
172
|
# bitrate encoding is required, +OGG_OPUS+ is highly preferred over
|
167
|
-
# Speex encoding. The
|
173
|
+
# Speex encoding. The [Speex](https://speex.org/) encoding supported by
|
168
174
|
# Cloud Speech API has a header byte in each block, as in MIME type
|
169
175
|
# +audio/x-speex-with-header-byte+.
|
170
176
|
# It is a variant of the RTP Speex encoding defined in
|
171
|
-
#
|
177
|
+
# [RFC 5574](https://tools.ietf.org/html/rfc5574).
|
172
178
|
# The stream is a sequence of blocks, one block per RTP packet. Each block
|
173
179
|
# starts with a byte containing the length of the block, in bytes, followed
|
174
180
|
# by one or more frames of Speex data, padded to an integral number of
|
@@ -188,13 +194,13 @@ module Google
|
|
188
194
|
# to improve the accuracy for specific words and phrases, for example, if
|
189
195
|
# specific commands are typically spoken by the user. This can also be used
|
190
196
|
# to add additional words to the vocabulary of the recognizer. See
|
191
|
-
#
|
197
|
+
# [usage limits](https://cloud.google.com/speech/limits#content).
|
192
198
|
class SpeechContext; end
|
193
199
|
|
194
200
|
# Contains audio data in the encoding specified in the +RecognitionConfig+.
|
195
201
|
# Either +content+ or +uri+ must be supplied. Supplying both or neither
|
196
|
-
# returns Google::Rpc::Code::INVALID_ARGUMENT. See
|
197
|
-
#
|
202
|
+
# returns {Google::Rpc::Code::INVALID_ARGUMENT}. See
|
203
|
+
# [audio limits](https://cloud.google.com/speech/limits#content).
|
198
204
|
# @!attribute [rw] content
|
199
205
|
# @return [String]
|
200
206
|
# The audio data bytes encoded as specified in
|
@@ -206,8 +212,8 @@ module Google
|
|
206
212
|
# +RecognitionConfig+. Currently, only Google Cloud Storage URIs are
|
207
213
|
# supported, which must be specified in the following format:
|
208
214
|
# +gs://bucket_name/object_name+ (other URI formats return
|
209
|
-
# Google::Rpc::Code::INVALID_ARGUMENT). For more information, see
|
210
|
-
#
|
215
|
+
# {Google::Rpc::Code::INVALID_ARGUMENT}). For more information, see
|
216
|
+
# [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
|
211
217
|
class RecognitionAudio; end
|
212
218
|
|
213
219
|
# The only message returned to the client by the +Recognize+ method. It
|
@@ -269,34 +275,32 @@ module Google
|
|
269
275
|
# 6. results { alternatives { transcript: " that is" } stability: 0.9 }
|
270
276
|
# results { alternatives { transcript: " the question" } stability: 0.01 }
|
271
277
|
#
|
272
|
-
# 7.
|
273
|
-
#
|
274
|
-
# 8. results { alternatives { transcript: " that is the question"
|
278
|
+
# 7. results { alternatives { transcript: " that is the question"
|
275
279
|
# confidence: 0.98 }
|
276
280
|
# alternatives { transcript: " that was the question" }
|
277
281
|
# is_final: true }
|
278
282
|
#
|
279
283
|
# Notes:
|
280
284
|
#
|
281
|
-
#
|
285
|
+
# * Only two of the above responses #4 and #7 contain final results; they are
|
282
286
|
# indicated by +is_final: true+. Concatenating these together generates the
|
283
287
|
# full transcript: "to be or not to be that is the question".
|
284
288
|
#
|
285
|
-
#
|
289
|
+
# * The others contain interim +results+. #3 and #6 contain two interim
|
286
290
|
# +results+: the first portion has a high stability and is less likely to
|
287
291
|
# change; the second portion has a low stability and is very likely to
|
288
292
|
# change. A UI designer might choose to show only high stability +results+.
|
289
293
|
#
|
290
|
-
#
|
294
|
+
# * The specific +stability+ and +confidence+ values shown above are only for
|
291
295
|
# illustrative purposes. Actual values may vary.
|
292
296
|
#
|
293
|
-
#
|
294
|
-
#
|
295
|
-
#
|
296
|
-
#
|
297
|
+
# * In each response, only one of these fields will be set:
|
298
|
+
# +error+,
|
299
|
+
# +speech_event_type+, or
|
300
|
+
# one or more (repeated) +results+.
|
297
301
|
# @!attribute [rw] error
|
298
302
|
# @return [Google::Rpc::Status]
|
299
|
-
# *Output-only* If set, returns a Google::Rpc::Status message that
|
303
|
+
# *Output-only* If set, returns a {Google::Rpc::Status} message that
|
300
304
|
# specifies the error for the operation.
|
301
305
|
# @!attribute [rw] results
|
302
306
|
# @return [Array<Google::Cloud::Speech::V1::StreamingRecognitionResult>]
|
@@ -351,6 +355,8 @@ module Google
|
|
351
355
|
# @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionAlternative>]
|
352
356
|
# *Output-only* May contain one or more recognition hypotheses (up to the
|
353
357
|
# maximum specified in +max_alternatives+).
|
358
|
+
# These alternatives are ordered in terms of accuracy, with the top (first)
|
359
|
+
# alternative being the most probable, as ranked by the recognizer.
|
354
360
|
class SpeechRecognitionResult; end
|
355
361
|
|
356
362
|
# Alternative hypotheses (a.k.a. n-best list).
|
@@ -363,10 +369,36 @@ module Google
|
|
363
369
|
# indicates an estimated greater likelihood that the recognized words are
|
364
370
|
# correct. This field is typically provided only for the top hypothesis, and
|
365
371
|
# only for +is_final=true+ results. Clients should not rely on the
|
366
|
-
# +confidence+ field as it is not guaranteed to be accurate
|
367
|
-
# any of the results.
|
372
|
+
# +confidence+ field as it is not guaranteed to be accurate or consistent.
|
368
373
|
# The default of 0.0 is a sentinel value indicating +confidence+ was not set.
|
374
|
+
# @!attribute [rw] words
|
375
|
+
# @return [Array<Google::Cloud::Speech::V1::WordInfo>]
|
376
|
+
# *Output-only* A list of word-specific information for each recognized word.
|
369
377
|
class SpeechRecognitionAlternative; end
|
378
|
+
|
379
|
+
# Word-specific information for recognized words. Word information is only
|
380
|
+
# included in the response when certain request parameters are set, such
|
381
|
+
# as +enable_word_time_offsets+.
|
382
|
+
# @!attribute [rw] start_time
|
383
|
+
# @return [Google::Protobuf::Duration]
|
384
|
+
# *Output-only* Time offset relative to the beginning of the audio,
|
385
|
+
# and corresponding to the start of the spoken word.
|
386
|
+
# This field is only set if +enable_word_time_offsets=true+ and only
|
387
|
+
# in the top hypothesis.
|
388
|
+
# This is an experimental feature and the accuracy of the time offset can
|
389
|
+
# vary.
|
390
|
+
# @!attribute [rw] end_time
|
391
|
+
# @return [Google::Protobuf::Duration]
|
392
|
+
# *Output-only* Time offset relative to the beginning of the audio,
|
393
|
+
# and corresponding to the end of the spoken word.
|
394
|
+
# This field is only set if +enable_word_time_offsets=true+ and only
|
395
|
+
# in the top hypothesis.
|
396
|
+
# This is an experimental feature and the accuracy of the time offset can
|
397
|
+
# vary.
|
398
|
+
# @!attribute [rw] word
|
399
|
+
# @return [String]
|
400
|
+
# *Output-only* The word corresponding to this set of information.
|
401
|
+
class WordInfo; end
|
370
402
|
end
|
371
403
|
end
|
372
404
|
end
|
@@ -77,7 +77,7 @@ module Google
|
|
77
77
|
# If the embedded message type is well-known and has a custom JSON
|
78
78
|
# representation, that representation will be embedded adding a field
|
79
79
|
# +value+ which holds the custom JSON in addition to the +@type+
|
80
|
-
# field. Example (for message Google::Protobuf::Duration):
|
80
|
+
# field. Example (for message {Google::Protobuf::Duration}):
|
81
81
|
#
|
82
82
|
# {
|
83
83
|
# "@type": "type.googleapis.com/google.protobuf.Duration",
|
@@ -96,7 +96,7 @@ module Google
|
|
96
96
|
# qualified name of the type (as in +path/google.protobuf.Duration+).
|
97
97
|
# The name should be in a canonical form (e.g., leading "." is
|
98
98
|
# not accepted).
|
99
|
-
# * An HTTP GET on the URL must yield a Google::Protobuf::Type
|
99
|
+
# * An HTTP GET on the URL must yield a {Google::Protobuf::Type}
|
100
100
|
# value in binary format, or produce an error.
|
101
101
|
# * Applications are allowed to cache lookup results based on the
|
102
102
|
# URL, or have them precompiled into a binary to avoid any
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# Copyright 2017, Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Google
|
16
|
+
module Protobuf
|
17
|
+
# A Duration represents a signed, fixed-length span of time represented
|
18
|
+
# as a count of seconds and fractions of seconds at nanosecond
|
19
|
+
# resolution. It is independent of any calendar and concepts like "day"
|
20
|
+
# or "month". It is related to Timestamp in that the difference between
|
21
|
+
# two Timestamp values is a Duration and it can be added or subtracted
|
22
|
+
# from a Timestamp. Range is approximately +-10,000 years.
|
23
|
+
#
|
24
|
+
# Example 1: Compute Duration from two Timestamps in pseudo code.
|
25
|
+
#
|
26
|
+
# Timestamp start = ...;
|
27
|
+
# Timestamp end = ...;
|
28
|
+
# Duration duration = ...;
|
29
|
+
#
|
30
|
+
# duration.seconds = end.seconds - start.seconds;
|
31
|
+
# duration.nanos = end.nanos - start.nanos;
|
32
|
+
#
|
33
|
+
# if (duration.seconds < 0 && duration.nanos > 0) {
|
34
|
+
# duration.seconds += 1;
|
35
|
+
# duration.nanos -= 1000000000;
|
36
|
+
# } else if (durations.seconds > 0 && duration.nanos < 0) {
|
37
|
+
# duration.seconds -= 1;
|
38
|
+
# duration.nanos += 1000000000;
|
39
|
+
# }
|
40
|
+
#
|
41
|
+
# Example 2: Compute Timestamp from Timestamp + Duration in pseudo code.
|
42
|
+
#
|
43
|
+
# Timestamp start = ...;
|
44
|
+
# Duration duration = ...;
|
45
|
+
# Timestamp end = ...;
|
46
|
+
#
|
47
|
+
# end.seconds = start.seconds + duration.seconds;
|
48
|
+
# end.nanos = start.nanos + duration.nanos;
|
49
|
+
#
|
50
|
+
# if (end.nanos < 0) {
|
51
|
+
# end.seconds -= 1;
|
52
|
+
# end.nanos += 1000000000;
|
53
|
+
# } else if (end.nanos >= 1000000000) {
|
54
|
+
# end.seconds += 1;
|
55
|
+
# end.nanos -= 1000000000;
|
56
|
+
# }
|
57
|
+
#
|
58
|
+
# Example 3: Compute Duration from datetime.timedelta in Python.
|
59
|
+
#
|
60
|
+
# td = datetime.timedelta(days=3, minutes=10)
|
61
|
+
# duration = Duration()
|
62
|
+
# duration.FromTimedelta(td)
|
63
|
+
# @!attribute [rw] seconds
|
64
|
+
# @return [Integer]
|
65
|
+
# Signed seconds of the span of time. Must be from -315,576,000,000
|
66
|
+
# to +315,576,000,000 inclusive.
|
67
|
+
# @!attribute [rw] nanos
|
68
|
+
# @return [Integer]
|
69
|
+
# Signed fractions of a second at nanosecond resolution of the span
|
70
|
+
# of time. Durations less than one second are represented with a 0
|
71
|
+
# +seconds+ field and a positive or negative +nanos+ field. For durations
|
72
|
+
# of one second or more, a non-zero value for the +nanos+ field must be
|
73
|
+
# of the same sign as the +seconds+ field. Must be from -999,999,999
|
74
|
+
# to +999,999,999 inclusive.
|
75
|
+
class Duration; end
|
76
|
+
end
|
77
|
+
end
|
@@ -16,16 +16,16 @@ module Google
|
|
16
16
|
module Rpc
|
17
17
|
# The +Status+ type defines a logical error model that is suitable for different
|
18
18
|
# programming environments, including REST APIs and RPC APIs. It is used by
|
19
|
-
#
|
19
|
+
# [gRPC](https://github.com/grpc). The error model is designed to be:
|
20
20
|
#
|
21
|
-
#
|
22
|
-
#
|
21
|
+
# * Simple to use and understand for most users
|
22
|
+
# * Flexible enough to meet unexpected needs
|
23
23
|
#
|
24
24
|
# = Overview
|
25
25
|
#
|
26
26
|
# The +Status+ message contains three pieces of data: error code, error message,
|
27
27
|
# and error details. The error code should be an enum value of
|
28
|
-
# Google::Rpc::Code, but it may accept additional error codes if needed. The
|
28
|
+
# {Google::Rpc::Code}, but it may accept additional error codes if needed. The
|
29
29
|
# error message should be a developer-facing English message that helps
|
30
30
|
# developers *understand* and *resolve* the error. If a localized user-facing
|
31
31
|
# error message is needed, put the localized message in the error details or
|
@@ -49,31 +49,31 @@ module Google
|
|
49
49
|
#
|
50
50
|
# Example uses of this error model include:
|
51
51
|
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
#
|
52
|
+
# * Partial errors. If a service needs to return partial errors to the client,
|
53
|
+
# it may embed the +Status+ in the normal response to indicate the partial
|
54
|
+
# errors.
|
55
55
|
#
|
56
|
-
#
|
57
|
-
#
|
56
|
+
# * Workflow errors. A typical workflow has multiple steps. Each step may
|
57
|
+
# have a +Status+ message for error reporting purpose.
|
58
58
|
#
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
59
|
+
# * Batch operations. If a client uses batch request and batch response, the
|
60
|
+
# +Status+ message should be used directly inside batch response, one for
|
61
|
+
# each error sub-response.
|
62
62
|
#
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
63
|
+
# * Asynchronous operations. If an API call embeds asynchronous operation
|
64
|
+
# results in its response, the status of those operations should be
|
65
|
+
# represented directly using the +Status+ message.
|
66
66
|
#
|
67
|
-
#
|
68
|
-
#
|
67
|
+
# * Logging. If some API errors are stored in logs, the message +Status+ could
|
68
|
+
# be used directly after any stripping needed for security/privacy reasons.
|
69
69
|
# @!attribute [rw] code
|
70
70
|
# @return [Integer]
|
71
|
-
# The status code, which should be an enum value of Google::Rpc::Code.
|
71
|
+
# The status code, which should be an enum value of {Google::Rpc::Code}.
|
72
72
|
# @!attribute [rw] message
|
73
73
|
# @return [String]
|
74
74
|
# A developer-facing error message, which should be in English. Any
|
75
75
|
# user-facing error message should be localized and sent in the
|
76
|
-
# Google::Rpc::Status#details field, or localized by the client.
|
76
|
+
# {Google::Rpc::Status#details} field, or localized by the client.
|
77
77
|
# @!attribute [rw] details
|
78
78
|
# @return [Array<Google::Protobuf::Any>]
|
79
79
|
# A list of messages that carry the error details. There will be a
|
@@ -165,11 +165,15 @@ module Google
|
|
165
165
|
# Performs synchronous speech recognition: receive results after all audio
|
166
166
|
# has been sent and processed.
|
167
167
|
#
|
168
|
-
# @param config [Google::Cloud::Speech::V1::RecognitionConfig]
|
168
|
+
# @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
|
169
169
|
# *Required* Provides information to the recognizer that specifies how to
|
170
170
|
# process the request.
|
171
|
-
#
|
171
|
+
# A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
|
172
|
+
# can also be provided.
|
173
|
+
# @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
|
172
174
|
# *Required* The audio data to be recognized.
|
175
|
+
# A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
|
176
|
+
# can also be provided.
|
173
177
|
# @param options [Google::Gax::CallOptions]
|
174
178
|
# Overrides the default settings for this call, e.g, timeout,
|
175
179
|
# retries, etc.
|
@@ -178,32 +182,28 @@ module Google
|
|
178
182
|
# @example
|
179
183
|
# require "google/cloud/speech/v1"
|
180
184
|
#
|
181
|
-
#
|
182
|
-
#
|
183
|
-
# RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
|
184
|
-
# SpeechClient = Google::Cloud::Speech::V1::SpeechClient
|
185
|
-
#
|
186
|
-
# speech_client = SpeechClient.new
|
187
|
-
# encoding = AudioEncoding::FLAC
|
185
|
+
# speech_client = Google::Cloud::Speech::V1::SpeechClient.new
|
186
|
+
# encoding = :FLAC
|
188
187
|
# sample_rate_hertz = 44100
|
189
188
|
# language_code = "en-US"
|
190
|
-
# config =
|
191
|
-
#
|
192
|
-
#
|
193
|
-
#
|
189
|
+
# config = {
|
190
|
+
# encoding: encoding,
|
191
|
+
# sample_rate_hertz: sample_rate_hertz,
|
192
|
+
# language_code: language_code
|
193
|
+
# }
|
194
194
|
# uri = "gs://bucket_name/file_name.flac"
|
195
|
-
# audio =
|
196
|
-
# audio.uri = uri
|
195
|
+
# audio = { uri: uri }
|
197
196
|
# response = speech_client.recognize(config, audio)
|
198
197
|
|
199
198
|
def recognize \
|
200
199
|
config,
|
201
200
|
audio,
|
202
201
|
options: nil
|
203
|
-
req =
|
202
|
+
req = {
|
204
203
|
config: config,
|
205
204
|
audio: audio
|
206
|
-
}.delete_if { |_, v| v.nil? }
|
205
|
+
}.delete_if { |_, v| v.nil? }
|
206
|
+
req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::RecognizeRequest)
|
207
207
|
@recognize.call(req, options)
|
208
208
|
end
|
209
209
|
|
@@ -212,11 +212,15 @@ module Google
|
|
212
212
|
# +Operation.error+ or an +Operation.response+ which contains
|
213
213
|
# a +LongRunningRecognizeResponse+ message.
|
214
214
|
#
|
215
|
-
# @param config [Google::Cloud::Speech::V1::RecognitionConfig]
|
215
|
+
# @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
|
216
216
|
# *Required* Provides information to the recognizer that specifies how to
|
217
217
|
# process the request.
|
218
|
-
#
|
218
|
+
# A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
|
219
|
+
# can also be provided.
|
220
|
+
# @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
|
219
221
|
# *Required* The audio data to be recognized.
|
222
|
+
# A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
|
223
|
+
# can also be provided.
|
220
224
|
# @param options [Google::Gax::CallOptions]
|
221
225
|
# Overrides the default settings for this call, e.g, timeout,
|
222
226
|
# retries, etc.
|
@@ -225,22 +229,17 @@ module Google
|
|
225
229
|
# @example
|
226
230
|
# require "google/cloud/speech/v1"
|
227
231
|
#
|
228
|
-
#
|
229
|
-
#
|
230
|
-
# RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
|
231
|
-
# SpeechClient = Google::Cloud::Speech::V1::SpeechClient
|
232
|
-
#
|
233
|
-
# speech_client = SpeechClient.new
|
234
|
-
# encoding = AudioEncoding::FLAC
|
232
|
+
# speech_client = Google::Cloud::Speech::V1::SpeechClient.new
|
233
|
+
# encoding = :FLAC
|
235
234
|
# sample_rate_hertz = 44100
|
236
235
|
# language_code = "en-US"
|
237
|
-
# config =
|
238
|
-
#
|
239
|
-
#
|
240
|
-
#
|
236
|
+
# config = {
|
237
|
+
# encoding: encoding,
|
238
|
+
# sample_rate_hertz: sample_rate_hertz,
|
239
|
+
# language_code: language_code
|
240
|
+
# }
|
241
241
|
# uri = "gs://bucket_name/file_name.flac"
|
242
|
-
# audio =
|
243
|
-
# audio.uri = uri
|
242
|
+
# audio = { uri: uri }
|
244
243
|
#
|
245
244
|
# # Register a callback during the method call.
|
246
245
|
# operation = speech_client.long_running_recognize(config, audio) do |op|
|
@@ -273,10 +272,11 @@ module Google
|
|
273
272
|
config,
|
274
273
|
audio,
|
275
274
|
options: nil
|
276
|
-
req =
|
275
|
+
req = {
|
277
276
|
config: config,
|
278
277
|
audio: audio
|
279
|
-
}.delete_if { |_, v| v.nil? }
|
278
|
+
}.delete_if { |_, v| v.nil? }
|
279
|
+
req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::LongRunningRecognizeRequest)
|
280
280
|
operation = Google::Gax::Operation.new(
|
281
281
|
@long_running_recognize.call(req, options),
|
282
282
|
@operations_client,
|
@@ -309,11 +309,8 @@ module Google
|
|
309
309
|
# @example
|
310
310
|
# require "google/cloud/speech/v1"
|
311
311
|
#
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
315
|
-
# speech_client = SpeechClient.new
|
316
|
-
# request = StreamingRecognizeRequest.new
|
312
|
+
# speech_client = Google::Cloud::Speech::V1::SpeechClient.new
|
313
|
+
# request = {}
|
317
314
|
# requests = [request]
|
318
315
|
# speech_client.streaming_recognize(requests).each do |element|
|
319
316
|
# # Process element.
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-speech
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.26.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Moore
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-07-
|
12
|
+
date: 2017-07-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: google-cloud-core
|
@@ -179,6 +179,7 @@ files:
|
|
179
179
|
- lib/google-cloud-speech.rb
|
180
180
|
- lib/google/cloud/speech.rb
|
181
181
|
- lib/google/cloud/speech/audio.rb
|
182
|
+
- lib/google/cloud/speech/convert.rb
|
182
183
|
- lib/google/cloud/speech/credentials.rb
|
183
184
|
- lib/google/cloud/speech/operation.rb
|
184
185
|
- lib/google/cloud/speech/project.rb
|
@@ -190,6 +191,7 @@ files:
|
|
190
191
|
- lib/google/cloud/speech/v1/cloud_speech_services_pb.rb
|
191
192
|
- lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb
|
192
193
|
- lib/google/cloud/speech/v1/doc/google/protobuf/any.rb
|
194
|
+
- lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb
|
193
195
|
- lib/google/cloud/speech/v1/doc/google/rpc/status.rb
|
194
196
|
- lib/google/cloud/speech/v1/speech_client.rb
|
195
197
|
- lib/google/cloud/speech/v1/speech_client_config.json
|