google-cloud-speech 0.25.0 → 0.26.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/google/cloud/speech/audio.rb +13 -4
- data/lib/google/cloud/speech/convert.rb +46 -0
- data/lib/google/cloud/speech/project.rb +25 -8
- data/lib/google/cloud/speech/result.rb +44 -6
- data/lib/google/cloud/speech/v1.rb +68 -0
- data/lib/google/cloud/speech/v1/cloud_speech_pb.rb +8 -0
- data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +61 -29
- data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +2 -2
- data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +77 -0
- data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +19 -19
- data/lib/google/cloud/speech/v1/speech_client.rb +36 -39
- data/lib/google/cloud/speech/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 454b1310e5e39c2b4ac676c26b6aea8a89f452c5
|
4
|
+
data.tar.gz: 12fa7c7129f20bdf92c5e61c3e1e294307937f1f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dca3bf30a3edad90e5dec095183a4a721491b2cef886ecc82fa4a83afe6fdc3c5e1387b336f9d0a96286b6605dc58cead675242e8d04338b2c44c17896c9b3fd
|
7
|
+
data.tar.gz: 6ce650dbbeb8cccac3a3e63c290b191ff3c92436d65cb1a1887d6e3480ca4707dfd326017186f3da278f913e70057258d03469df503f9fcd0ce6c29a944e4fb5
|
@@ -191,6 +191,10 @@ module Google
|
|
191
191
|
# phrases "hints" so that the speech recognition is more likely to
|
192
192
|
# recognize them. See [usage
|
193
193
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
194
|
+
# @param [Boolean] words When `true`, return a list of words with
|
195
|
+
# additional information about each word. Currently, the only
|
196
|
+
# additional information provided is the the start and end time
|
197
|
+
# offsets. See {Result#words}. Default is `false`.
|
194
198
|
#
|
195
199
|
# @return [Array<Result>] The transcribed text of audio recognized.
|
196
200
|
#
|
@@ -209,14 +213,15 @@ module Google
|
|
209
213
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
210
214
|
# result.confidence #=> 0.9826789498329163
|
211
215
|
#
|
212
|
-
def recognize max_alternatives: nil, profanity_filter: nil,
|
216
|
+
def recognize max_alternatives: nil, profanity_filter: nil,
|
217
|
+
phrases: nil, words: nil
|
213
218
|
ensure_speech!
|
214
219
|
|
215
220
|
speech.recognize self, encoding: encoding, sample_rate: sample_rate,
|
216
221
|
language: language,
|
217
222
|
max_alternatives: max_alternatives,
|
218
223
|
profanity_filter: profanity_filter,
|
219
|
-
phrases: phrases
|
224
|
+
phrases: phrases, words: words
|
220
225
|
end
|
221
226
|
|
222
227
|
##
|
@@ -239,6 +244,10 @@ module Google
|
|
239
244
|
# phrases "hints" so that the speech recognition is more likely to
|
240
245
|
# recognize them. See [usage
|
241
246
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
247
|
+
# @param [Boolean] words When `true`, return a list of words with
|
248
|
+
# additional information about each word. Currently, the only
|
249
|
+
# additional information provided is the the start and end time
|
250
|
+
# offsets. See {Result#words}. Default is `false`.
|
242
251
|
#
|
243
252
|
# @return [Operation] A resource represents the long-running,
|
244
253
|
# asynchronous processing of a speech-recognition operation.
|
@@ -260,7 +269,7 @@ module Google
|
|
260
269
|
# results = op.results
|
261
270
|
#
|
262
271
|
def process max_alternatives: nil, profanity_filter: nil,
|
263
|
-
phrases: nil
|
272
|
+
phrases: nil, words: nil
|
264
273
|
ensure_speech!
|
265
274
|
|
266
275
|
speech.process self, encoding: encoding,
|
@@ -268,7 +277,7 @@ module Google
|
|
268
277
|
language: language,
|
269
278
|
max_alternatives: max_alternatives,
|
270
279
|
profanity_filter: profanity_filter,
|
271
|
-
phrases: phrases
|
280
|
+
phrases: phrases, words: words
|
272
281
|
end
|
273
282
|
alias_method :long_running_recognize, :process
|
274
283
|
alias_method :recognize_job, :process
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Copyright 2017 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/protobuf/duration_pb"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Speech
|
21
|
+
##
|
22
|
+
# @private Helper module for converting Speech values.
|
23
|
+
module Convert
|
24
|
+
module ClassMethods
|
25
|
+
def number_to_duration number
|
26
|
+
return nil if number.nil?
|
27
|
+
|
28
|
+
Google::Protobuf::Duration.new \
|
29
|
+
seconds: number.to_i,
|
30
|
+
nanos: (number.remainder(1) * 1000000000).round
|
31
|
+
end
|
32
|
+
|
33
|
+
def duration_to_number duration
|
34
|
+
return nil if duration.nil?
|
35
|
+
|
36
|
+
return duration.seconds if duration.nanos == 0
|
37
|
+
|
38
|
+
duration.seconds + (duration.nanos / 1000000000.0)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
extend ClassMethods
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -266,6 +266,10 @@ module Google
|
|
266
266
|
# phrases "hints" so that the speech recognition is more likely to
|
267
267
|
# recognize them. See [usage
|
268
268
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
269
|
+
# @param [Boolean] words When `true`, return a list of words with
|
270
|
+
# additional information about each word. Currently, the only
|
271
|
+
# additional information provided is the the start and end time
|
272
|
+
# offsets. See {Result#words}. Default is `false`.
|
269
273
|
#
|
270
274
|
# @return [Array<Result>] The transcribed text of audio recognized.
|
271
275
|
#
|
@@ -308,7 +312,8 @@ module Google
|
|
308
312
|
# max_alternatives: 10
|
309
313
|
#
|
310
314
|
def recognize source, encoding: nil, language: nil, sample_rate: nil,
|
311
|
-
max_alternatives: nil, profanity_filter: nil,
|
315
|
+
max_alternatives: nil, profanity_filter: nil,
|
316
|
+
phrases: nil, words: nil
|
312
317
|
ensure_service!
|
313
318
|
|
314
319
|
audio_obj = audio source, encoding: encoding, language: language,
|
@@ -317,7 +322,8 @@ module Google
|
|
317
322
|
config = audio_config(
|
318
323
|
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
319
324
|
language: audio_obj.language, max_alternatives: max_alternatives,
|
320
|
-
profanity_filter: profanity_filter, phrases: phrases
|
325
|
+
profanity_filter: profanity_filter, phrases: phrases,
|
326
|
+
words: words)
|
321
327
|
|
322
328
|
grpc = service.recognize_sync audio_obj.to_grpc, config
|
323
329
|
grpc.results.map do |result_grpc|
|
@@ -388,6 +394,10 @@ module Google
|
|
388
394
|
# phrases "hints" so that the speech recognition is more likely to
|
389
395
|
# recognize them. See [usage
|
390
396
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
397
|
+
# @param [Boolean] words When `true`, return a list of words with
|
398
|
+
# additional information about each word. Currently, the only
|
399
|
+
# additional information provided is the the start and end time
|
400
|
+
# offsets. See {Result#words}. Default is `false`.
|
391
401
|
#
|
392
402
|
# @return [Operation] A resource represents the long-running,
|
393
403
|
# asynchronous processing of a speech-recognition operation.
|
@@ -440,7 +450,8 @@ module Google
|
|
440
450
|
# op.reload!
|
441
451
|
#
|
442
452
|
def process source, encoding: nil, sample_rate: nil, language: nil,
|
443
|
-
max_alternatives: nil, profanity_filter: nil, phrases: nil
|
453
|
+
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
454
|
+
words: nil
|
444
455
|
ensure_service!
|
445
456
|
|
446
457
|
audio_obj = audio source, encoding: encoding, language: language,
|
@@ -449,7 +460,8 @@ module Google
|
|
449
460
|
config = audio_config(
|
450
461
|
encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
|
451
462
|
language: audio_obj.language, max_alternatives: max_alternatives,
|
452
|
-
profanity_filter: profanity_filter, phrases: phrases
|
463
|
+
profanity_filter: profanity_filter, phrases: phrases,
|
464
|
+
words: words)
|
453
465
|
|
454
466
|
grpc = service.recognize_async audio_obj.to_grpc, config
|
455
467
|
Operation.from_grpc grpc
|
@@ -513,6 +525,10 @@ module Google
|
|
513
525
|
# phrases "hints" so that the speech recognition is more likely to
|
514
526
|
# recognize them. See [usage
|
515
527
|
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
528
|
+
# @param [Boolean] words When `true`, return a list of words with
|
529
|
+
# additional information about each word. Currently, the only
|
530
|
+
# additional information provided is the the start and end time
|
531
|
+
# offsets. See {Result#words}. Default is `false`.
|
516
532
|
# @param [Boolean] utterance When `true`, the service will perform
|
517
533
|
# continuous recognition (continuing to process audio even if the user
|
518
534
|
# pauses speaking) until the client closes the output stream (gRPC
|
@@ -550,7 +566,7 @@ module Google
|
|
550
566
|
#
|
551
567
|
def stream encoding: nil, language: nil, sample_rate: nil,
|
552
568
|
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
553
|
-
utterance: nil, interim: nil
|
569
|
+
words: nil, utterance: nil, interim: nil
|
554
570
|
ensure_service!
|
555
571
|
|
556
572
|
grpc_req = V1::StreamingRecognizeRequest.new(
|
@@ -561,7 +577,7 @@ module Google
|
|
561
577
|
sample_rate: sample_rate,
|
562
578
|
max_alternatives: max_alternatives,
|
563
579
|
profanity_filter: profanity_filter,
|
564
|
-
phrases: phrases),
|
580
|
+
phrases: phrases, words: words),
|
565
581
|
single_utterance: utterance,
|
566
582
|
interim_results: interim
|
567
583
|
}.delete_if { |_, v| v.nil? }
|
@@ -608,7 +624,7 @@ module Google
|
|
608
624
|
|
609
625
|
def audio_config encoding: nil, language: nil, sample_rate: nil,
|
610
626
|
max_alternatives: nil, profanity_filter: nil,
|
611
|
-
phrases: nil
|
627
|
+
phrases: nil, words: nil
|
612
628
|
contexts = nil
|
613
629
|
contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
|
614
630
|
language = String(language) unless language.nil?
|
@@ -618,7 +634,8 @@ module Google
|
|
618
634
|
sample_rate_hertz: sample_rate,
|
619
635
|
max_alternatives: max_alternatives,
|
620
636
|
profanity_filter: profanity_filter,
|
621
|
-
speech_contexts: contexts
|
637
|
+
speech_contexts: contexts,
|
638
|
+
enable_word_time_offsets: words
|
622
639
|
}.delete_if { |_, v| v.nil? })
|
623
640
|
end
|
624
641
|
|
@@ -14,6 +14,7 @@
|
|
14
14
|
|
15
15
|
|
16
16
|
require "google/cloud/speech/v1"
|
17
|
+
require "google/cloud/speech/convert"
|
17
18
|
|
18
19
|
module Google
|
19
20
|
module Cloud
|
@@ -35,6 +36,10 @@ module Google
|
|
35
36
|
# recognition is correct. This field is typically provided only for the
|
36
37
|
# top hypothesis. A value of 0.0 is a sentinel value indicating
|
37
38
|
# confidence was not set.
|
39
|
+
# @attr_reader [Array<Result::Word>] words A list of words with additional
|
40
|
+
# information about each word. Currently, the only additional
|
41
|
+
# information provided is the the start and end time offsets. Available
|
42
|
+
# when using the `words` argument in relevant methods.
|
38
43
|
# @attr_reader [Array<Result::Alternative>] alternatives Additional
|
39
44
|
# recognition hypotheses (up to the value specified in
|
40
45
|
# `max_alternatives`). The server may return fewer than
|
@@ -56,13 +61,14 @@ module Google
|
|
56
61
|
# result.confidence #=> 0.9826789498329163
|
57
62
|
#
|
58
63
|
class Result
|
59
|
-
attr_reader :transcript, :confidence, :alternatives
|
64
|
+
attr_reader :transcript, :confidence, :words, :alternatives
|
60
65
|
|
61
66
|
##
|
62
67
|
# @private Creates a new Results instance.
|
63
|
-
def initialize transcript, confidence, alternatives = []
|
64
|
-
@transcript
|
65
|
-
@confidence
|
68
|
+
def initialize transcript, confidence, words = [], alternatives = []
|
69
|
+
@transcript = transcript
|
70
|
+
@confidence = confidence
|
71
|
+
@words = words
|
66
72
|
@alternatives = alternatives
|
67
73
|
end
|
68
74
|
|
@@ -71,10 +77,42 @@ module Google
|
|
71
77
|
def self.from_grpc grpc
|
72
78
|
head, *tail = *grpc.alternatives
|
73
79
|
return nil if head.nil?
|
80
|
+
words = Array(head.words).map do |w|
|
81
|
+
Word.new w.word, Convert.duration_to_number(w.start_time),
|
82
|
+
Convert.duration_to_number(w.end_time)
|
83
|
+
end
|
74
84
|
alternatives = tail.map do |alt|
|
75
85
|
Alternative.new alt.transcript, alt.confidence
|
76
86
|
end
|
77
|
-
new head.transcript, head.confidence, alternatives
|
87
|
+
new head.transcript, head.confidence, words, alternatives
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# Word-specific information for recognized words. Currently, the only
|
92
|
+
# additional information provided is the the start and end time offsets.
|
93
|
+
# Available when using the `words` argument in relevant methods.
|
94
|
+
#
|
95
|
+
# @attr_reader [String] word The word corresponding to this set of
|
96
|
+
# information.
|
97
|
+
# @attr_reader [Numeric] start_time Time offset relative to the
|
98
|
+
# beginning of the audio, and corresponding to the start of the spoken
|
99
|
+
# word. This field is only set if `words` was specified. This is an
|
100
|
+
# experimental feature and the accuracy of the time offset can vary.
|
101
|
+
# @attr_reader [Numeric] end_time Time offset relative to the
|
102
|
+
# beginning of the audio, and corresponding to the end of the spoken
|
103
|
+
# word. This field is only set if `words` was specified. This is an
|
104
|
+
# experimental feature and the accuracy of the time offset can vary.
|
105
|
+
class Word
|
106
|
+
attr_reader :word, :start_time, :end_time
|
107
|
+
alias_method :to_str, :word
|
108
|
+
|
109
|
+
##
|
110
|
+
# @private Creates a new Result::Word instance.
|
111
|
+
def initialize word, start_time, end_time
|
112
|
+
@word = word
|
113
|
+
@start_time = start_time
|
114
|
+
@end_time = end_time
|
115
|
+
end
|
78
116
|
end
|
79
117
|
|
80
118
|
##
|
@@ -114,7 +152,7 @@ module Google
|
|
114
152
|
##
|
115
153
|
# @private Creates a new Result::Alternative instance.
|
116
154
|
def initialize transcript, confidence
|
117
|
-
@transcript
|
155
|
+
@transcript = transcript
|
118
156
|
@confidence = confidence
|
119
157
|
end
|
120
158
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# Copyright 2017, Google Inc. All rights reserved.
|
2
|
+
#
|
2
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
3
4
|
# you may not use this file except in compliance with the License.
|
4
5
|
# You may obtain a copy of the License at
|
@@ -11,4 +12,71 @@
|
|
11
12
|
# See the License for the specific language governing permissions and
|
12
13
|
# limitations under the License.
|
13
14
|
|
15
|
+
module Google
|
16
|
+
module Cloud
|
17
|
+
# rubocop:disable LineLength
|
18
|
+
|
19
|
+
##
|
20
|
+
# # Ruby Client for Google Cloud Speech API ([Alpha](https://github.com/GoogleCloudPlatform/google-cloud-ruby#versioning))
|
21
|
+
#
|
22
|
+
# [Google Cloud Speech API][Product Documentation]: Google Cloud Speech API.
|
23
|
+
# - [Product Documentation][]
|
24
|
+
#
|
25
|
+
# ## Quick Start
|
26
|
+
# In order to use this library, you first need to go through the following steps:
|
27
|
+
#
|
28
|
+
# 1. [Select or create a Cloud Platform project.](https://console.cloud.google.com/project)
|
29
|
+
# 2. [Enable the Google Cloud Speech API.](https://console.cloud.google.com/apis/api/speech)
|
30
|
+
# 3. [Setup Authentication.](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud/master/guides/authentication)
|
31
|
+
#
|
32
|
+
# ### Installation
|
33
|
+
# ```
|
34
|
+
# $ gem install google-cloud-speech
|
35
|
+
# ```
|
36
|
+
#
|
37
|
+
# ### Preview
|
38
|
+
# #### SpeechClient
|
39
|
+
# ```rb
|
40
|
+
# require "google/cloud/speech/v1"
|
41
|
+
#
|
42
|
+
# speech_client = Google::Cloud::Speech::V1::SpeechClient.new
|
43
|
+
# language_code = "en-US"
|
44
|
+
# sample_rate_hertz = 44100
|
45
|
+
# encoding = :FLAC
|
46
|
+
# config = {
|
47
|
+
# language_code: language_code,
|
48
|
+
# sample_rate_hertz: sample_rate_hertz,
|
49
|
+
# encoding: encoding
|
50
|
+
# }
|
51
|
+
# uri = "gs://gapic-toolkit/hello.flac"
|
52
|
+
# audio = { uri: uri }
|
53
|
+
# response = speech_client.recognize(config, audio)
|
54
|
+
# ```
|
55
|
+
#
|
56
|
+
# ### Next Steps
|
57
|
+
# - Read the [Google Cloud Speech API Product documentation][Product Documentation] to learn more about the product and see How-to Guides.
|
58
|
+
# - View this [repository's main README](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/README.md) to see the full list of Cloud APIs that we cover.
|
59
|
+
#
|
60
|
+
# [Product Documentation]: https://cloud.google.com/speech
|
61
|
+
#
|
62
|
+
module Speech
|
63
|
+
# rubocop:enable LineLength
|
64
|
+
|
65
|
+
##
|
66
|
+
# # Google Cloud Speech API Contents
|
67
|
+
#
|
68
|
+
# | Class | Description |
|
69
|
+
# | ----- | ----------- |
|
70
|
+
# | [SpeechClient][] | Google Cloud Speech API. |
|
71
|
+
# | [Data Types][] | Data types for Google::Cloud::Speech::V1 |
|
72
|
+
#
|
73
|
+
# [SpeechClient]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/speechclient
|
74
|
+
# [Data Types]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/datatypes
|
75
|
+
#
|
76
|
+
module V1
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
14
82
|
require "google/cloud/speech/v1/speech_client"
|
@@ -36,6 +36,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
36
36
|
optional :max_alternatives, :int32, 4
|
37
37
|
optional :profanity_filter, :bool, 5
|
38
38
|
repeated :speech_contexts, :message, 6, "google.cloud.speech.v1.SpeechContext"
|
39
|
+
optional :enable_word_time_offsets, :bool, 8
|
39
40
|
end
|
40
41
|
add_enum "google.cloud.speech.v1.RecognitionConfig.AudioEncoding" do
|
41
42
|
value :ENCODING_UNSPECIFIED, 0
|
@@ -87,6 +88,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
87
88
|
add_message "google.cloud.speech.v1.SpeechRecognitionAlternative" do
|
88
89
|
optional :transcript, :string, 1
|
89
90
|
optional :confidence, :float, 2
|
91
|
+
repeated :words, :message, 3, "google.cloud.speech.v1.WordInfo"
|
92
|
+
end
|
93
|
+
add_message "google.cloud.speech.v1.WordInfo" do
|
94
|
+
optional :start_time, :message, 1, "google.protobuf.Duration"
|
95
|
+
optional :end_time, :message, 2, "google.protobuf.Duration"
|
96
|
+
optional :word, :string, 3
|
90
97
|
end
|
91
98
|
end
|
92
99
|
|
@@ -110,6 +117,7 @@ module Google
|
|
110
117
|
StreamingRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionResult").msgclass
|
111
118
|
SpeechRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionResult").msgclass
|
112
119
|
SpeechRecognitionAlternative = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionAlternative").msgclass
|
120
|
+
WordInfo = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.WordInfo").msgclass
|
113
121
|
end
|
114
122
|
end
|
115
123
|
end
|
@@ -56,7 +56,7 @@ module Google
|
|
56
56
|
# +audio_content+ data. The audio bytes must be encoded as specified in
|
57
57
|
# +RecognitionConfig+. Note: as with all bytes fields, protobuffers use a
|
58
58
|
# pure binary representation (not base64). See
|
59
|
-
#
|
59
|
+
# [audio limits](https://cloud.google.com/speech/limits#content).
|
60
60
|
class StreamingRecognizeRequest; end
|
61
61
|
|
62
62
|
# Provides information to the recognizer that specifies how to process the
|
@@ -101,9 +101,9 @@ module Google
|
|
101
101
|
# @!attribute [rw] language_code
|
102
102
|
# @return [String]
|
103
103
|
# *Required* The language of the supplied audio as a
|
104
|
-
#
|
104
|
+
# [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
|
105
105
|
# Example: "en-US".
|
106
|
-
# See
|
106
|
+
# See [Language Support](https://cloud.google.com/speech/docs/languages)
|
107
107
|
# for a list of the currently supported language codes.
|
108
108
|
# @!attribute [rw] max_alternatives
|
109
109
|
# @return [Integer]
|
@@ -122,11 +122,17 @@ module Google
|
|
122
122
|
# @!attribute [rw] speech_contexts
|
123
123
|
# @return [Array<Google::Cloud::Speech::V1::SpeechContext>]
|
124
124
|
# *Optional* A means to provide context to assist the speech recognition.
|
125
|
+
# @!attribute [rw] enable_word_time_offsets
|
126
|
+
# @return [true, false]
|
127
|
+
# *Optional* If +true+, the top result includes a list of words and
|
128
|
+
# the start and end time offsets (timestamps) for those words. If
|
129
|
+
# +false+, no word-level time offset information is returned. The default is
|
130
|
+
# +false+.
|
125
131
|
class RecognitionConfig
|
126
132
|
# Audio encoding of the data sent in the audio message. All encodings support
|
127
|
-
# only 1 channel (mono) audio. Only +FLAC+
|
128
|
-
# the bytes of audio that follow the header. The other encodings
|
129
|
-
# audio bytes with no header.
|
133
|
+
# only 1 channel (mono) audio. Only +FLAC+ and +WAV+ include a header that
|
134
|
+
# describes the bytes of audio that follow the header. The other encodings
|
135
|
+
# are raw audio bytes with no header.
|
130
136
|
#
|
131
137
|
# For best results, the audio source should be captured and transmitted using
|
132
138
|
# a lossless encoding (+FLAC+ or +LINEAR16+). Recognition accuracy may be
|
@@ -134,13 +140,13 @@ module Google
|
|
134
140
|
# this section, are used to capture or transmit the audio, particularly if
|
135
141
|
# background noise is present.
|
136
142
|
module AudioEncoding
|
137
|
-
# Not specified. Will return result Google::Rpc::Code::INVALID_ARGUMENT.
|
143
|
+
# Not specified. Will return result {Google::Rpc::Code::INVALID_ARGUMENT}.
|
138
144
|
ENCODING_UNSPECIFIED = 0
|
139
145
|
|
140
146
|
# Uncompressed 16-bit signed little-endian samples (Linear PCM).
|
141
147
|
LINEAR16 = 1
|
142
148
|
|
143
|
-
#
|
149
|
+
# [+FLAC+](https://xiph.org/flac/documentation.html) (Free Lossless Audio
|
144
150
|
# Codec) is the recommended encoding because it is
|
145
151
|
# lossless--therefore recognition is not compromised--and
|
146
152
|
# requires only about half the bandwidth of +LINEAR16+. +FLAC+ stream
|
@@ -158,17 +164,17 @@ module Google
|
|
158
164
|
AMR_WB = 5
|
159
165
|
|
160
166
|
# Opus encoded audio frames in Ogg container
|
161
|
-
# (
|
167
|
+
# ([OggOpus](https://wiki.xiph.org/OggOpus)).
|
162
168
|
# +sample_rate_hertz+ must be 16000.
|
163
169
|
OGG_OPUS = 6
|
164
170
|
|
165
171
|
# Although the use of lossy encodings is not recommended, if a very low
|
166
172
|
# bitrate encoding is required, +OGG_OPUS+ is highly preferred over
|
167
|
-
# Speex encoding. The
|
173
|
+
# Speex encoding. The [Speex](https://speex.org/) encoding supported by
|
168
174
|
# Cloud Speech API has a header byte in each block, as in MIME type
|
169
175
|
# +audio/x-speex-with-header-byte+.
|
170
176
|
# It is a variant of the RTP Speex encoding defined in
|
171
|
-
#
|
177
|
+
# [RFC 5574](https://tools.ietf.org/html/rfc5574).
|
172
178
|
# The stream is a sequence of blocks, one block per RTP packet. Each block
|
173
179
|
# starts with a byte containing the length of the block, in bytes, followed
|
174
180
|
# by one or more frames of Speex data, padded to an integral number of
|
@@ -188,13 +194,13 @@ module Google
|
|
188
194
|
# to improve the accuracy for specific words and phrases, for example, if
|
189
195
|
# specific commands are typically spoken by the user. This can also be used
|
190
196
|
# to add additional words to the vocabulary of the recognizer. See
|
191
|
-
#
|
197
|
+
# [usage limits](https://cloud.google.com/speech/limits#content).
|
192
198
|
class SpeechContext; end
|
193
199
|
|
194
200
|
# Contains audio data in the encoding specified in the +RecognitionConfig+.
|
195
201
|
# Either +content+ or +uri+ must be supplied. Supplying both or neither
|
196
|
-
# returns Google::Rpc::Code::INVALID_ARGUMENT. See
|
197
|
-
#
|
202
|
+
# returns {Google::Rpc::Code::INVALID_ARGUMENT}. See
|
203
|
+
# [audio limits](https://cloud.google.com/speech/limits#content).
|
198
204
|
# @!attribute [rw] content
|
199
205
|
# @return [String]
|
200
206
|
# The audio data bytes encoded as specified in
|
@@ -206,8 +212,8 @@ module Google
|
|
206
212
|
# +RecognitionConfig+. Currently, only Google Cloud Storage URIs are
|
207
213
|
# supported, which must be specified in the following format:
|
208
214
|
# +gs://bucket_name/object_name+ (other URI formats return
|
209
|
-
# Google::Rpc::Code::INVALID_ARGUMENT). For more information, see
|
210
|
-
#
|
215
|
+
# {Google::Rpc::Code::INVALID_ARGUMENT}). For more information, see
|
216
|
+
# [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
|
211
217
|
class RecognitionAudio; end
|
212
218
|
|
213
219
|
# The only message returned to the client by the +Recognize+ method. It
|
@@ -269,34 +275,32 @@ module Google
|
|
269
275
|
# 6. results { alternatives { transcript: " that is" } stability: 0.9 }
|
270
276
|
# results { alternatives { transcript: " the question" } stability: 0.01 }
|
271
277
|
#
|
272
|
-
# 7.
|
273
|
-
#
|
274
|
-
# 8. results { alternatives { transcript: " that is the question"
|
278
|
+
# 7. results { alternatives { transcript: " that is the question"
|
275
279
|
# confidence: 0.98 }
|
276
280
|
# alternatives { transcript: " that was the question" }
|
277
281
|
# is_final: true }
|
278
282
|
#
|
279
283
|
# Notes:
|
280
284
|
#
|
281
|
-
#
|
285
|
+
# * Only two of the above responses #4 and #7 contain final results; they are
|
282
286
|
# indicated by +is_final: true+. Concatenating these together generates the
|
283
287
|
# full transcript: "to be or not to be that is the question".
|
284
288
|
#
|
285
|
-
#
|
289
|
+
# * The others contain interim +results+. #3 and #6 contain two interim
|
286
290
|
# +results+: the first portion has a high stability and is less likely to
|
287
291
|
# change; the second portion has a low stability and is very likely to
|
288
292
|
# change. A UI designer might choose to show only high stability +results+.
|
289
293
|
#
|
290
|
-
#
|
294
|
+
# * The specific +stability+ and +confidence+ values shown above are only for
|
291
295
|
# illustrative purposes. Actual values may vary.
|
292
296
|
#
|
293
|
-
#
|
294
|
-
#
|
295
|
-
#
|
296
|
-
#
|
297
|
+
# * In each response, only one of these fields will be set:
|
298
|
+
# +error+,
|
299
|
+
# +speech_event_type+, or
|
300
|
+
# one or more (repeated) +results+.
|
297
301
|
# @!attribute [rw] error
|
298
302
|
# @return [Google::Rpc::Status]
|
299
|
-
# *Output-only* If set, returns a Google::Rpc::Status message that
|
303
|
+
# *Output-only* If set, returns a {Google::Rpc::Status} message that
|
300
304
|
# specifies the error for the operation.
|
301
305
|
# @!attribute [rw] results
|
302
306
|
# @return [Array<Google::Cloud::Speech::V1::StreamingRecognitionResult>]
|
@@ -351,6 +355,8 @@ module Google
|
|
351
355
|
# @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionAlternative>]
|
352
356
|
# *Output-only* May contain one or more recognition hypotheses (up to the
|
353
357
|
# maximum specified in +max_alternatives+).
|
358
|
+
# These alternatives are ordered in terms of accuracy, with the top (first)
|
359
|
+
# alternative being the most probable, as ranked by the recognizer.
|
354
360
|
class SpeechRecognitionResult; end
|
355
361
|
|
356
362
|
# Alternative hypotheses (a.k.a. n-best list).
|
@@ -363,10 +369,36 @@ module Google
|
|
363
369
|
# indicates an estimated greater likelihood that the recognized words are
|
364
370
|
# correct. This field is typically provided only for the top hypothesis, and
|
365
371
|
# only for +is_final=true+ results. Clients should not rely on the
|
366
|
-
# +confidence+ field as it is not guaranteed to be accurate
|
367
|
-
# any of the results.
|
372
|
+
# +confidence+ field as it is not guaranteed to be accurate or consistent.
|
368
373
|
# The default of 0.0 is a sentinel value indicating +confidence+ was not set.
|
374
|
+
# @!attribute [rw] words
|
375
|
+
# @return [Array<Google::Cloud::Speech::V1::WordInfo>]
|
376
|
+
# *Output-only* A list of word-specific information for each recognized word.
|
369
377
|
class SpeechRecognitionAlternative; end
|
378
|
+
|
379
|
+
# Word-specific information for recognized words. Word information is only
|
380
|
+
# included in the response when certain request parameters are set, such
|
381
|
+
# as +enable_word_time_offsets+.
|
382
|
+
# @!attribute [rw] start_time
|
383
|
+
# @return [Google::Protobuf::Duration]
|
384
|
+
# *Output-only* Time offset relative to the beginning of the audio,
|
385
|
+
# and corresponding to the start of the spoken word.
|
386
|
+
# This field is only set if +enable_word_time_offsets=true+ and only
|
387
|
+
# in the top hypothesis.
|
388
|
+
# This is an experimental feature and the accuracy of the time offset can
|
389
|
+
# vary.
|
390
|
+
# @!attribute [rw] end_time
|
391
|
+
# @return [Google::Protobuf::Duration]
|
392
|
+
# *Output-only* Time offset relative to the beginning of the audio,
|
393
|
+
# and corresponding to the end of the spoken word.
|
394
|
+
# This field is only set if +enable_word_time_offsets=true+ and only
|
395
|
+
# in the top hypothesis.
|
396
|
+
# This is an experimental feature and the accuracy of the time offset can
|
397
|
+
# vary.
|
398
|
+
# @!attribute [rw] word
|
399
|
+
# @return [String]
|
400
|
+
# *Output-only* The word corresponding to this set of information.
|
401
|
+
class WordInfo; end
|
370
402
|
end
|
371
403
|
end
|
372
404
|
end
|
@@ -77,7 +77,7 @@ module Google
|
|
77
77
|
# If the embedded message type is well-known and has a custom JSON
|
78
78
|
# representation, that representation will be embedded adding a field
|
79
79
|
# +value+ which holds the custom JSON in addition to the +@type+
|
80
|
-
# field. Example (for message Google::Protobuf::Duration):
|
80
|
+
# field. Example (for message {Google::Protobuf::Duration}):
|
81
81
|
#
|
82
82
|
# {
|
83
83
|
# "@type": "type.googleapis.com/google.protobuf.Duration",
|
@@ -96,7 +96,7 @@ module Google
|
|
96
96
|
# qualified name of the type (as in +path/google.protobuf.Duration+).
|
97
97
|
# The name should be in a canonical form (e.g., leading "." is
|
98
98
|
# not accepted).
|
99
|
-
# * An HTTP GET on the URL must yield a Google::Protobuf::Type
|
99
|
+
# * An HTTP GET on the URL must yield a {Google::Protobuf::Type}
|
100
100
|
# value in binary format, or produce an error.
|
101
101
|
# * Applications are allowed to cache lookup results based on the
|
102
102
|
# URL, or have them precompiled into a binary to avoid any
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# Copyright 2017, Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Google
|
16
|
+
module Protobuf
|
17
|
+
# A Duration represents a signed, fixed-length span of time represented
|
18
|
+
# as a count of seconds and fractions of seconds at nanosecond
|
19
|
+
# resolution. It is independent of any calendar and concepts like "day"
|
20
|
+
# or "month". It is related to Timestamp in that the difference between
|
21
|
+
# two Timestamp values is a Duration and it can be added or subtracted
|
22
|
+
# from a Timestamp. Range is approximately +-10,000 years.
|
23
|
+
#
|
24
|
+
# Example 1: Compute Duration from two Timestamps in pseudo code.
|
25
|
+
#
|
26
|
+
# Timestamp start = ...;
|
27
|
+
# Timestamp end = ...;
|
28
|
+
# Duration duration = ...;
|
29
|
+
#
|
30
|
+
# duration.seconds = end.seconds - start.seconds;
|
31
|
+
# duration.nanos = end.nanos - start.nanos;
|
32
|
+
#
|
33
|
+
# if (duration.seconds < 0 && duration.nanos > 0) {
|
34
|
+
# duration.seconds += 1;
|
35
|
+
# duration.nanos -= 1000000000;
|
36
|
+
# } else if (durations.seconds > 0 && duration.nanos < 0) {
|
37
|
+
# duration.seconds -= 1;
|
38
|
+
# duration.nanos += 1000000000;
|
39
|
+
# }
|
40
|
+
#
|
41
|
+
# Example 2: Compute Timestamp from Timestamp + Duration in pseudo code.
|
42
|
+
#
|
43
|
+
# Timestamp start = ...;
|
44
|
+
# Duration duration = ...;
|
45
|
+
# Timestamp end = ...;
|
46
|
+
#
|
47
|
+
# end.seconds = start.seconds + duration.seconds;
|
48
|
+
# end.nanos = start.nanos + duration.nanos;
|
49
|
+
#
|
50
|
+
# if (end.nanos < 0) {
|
51
|
+
# end.seconds -= 1;
|
52
|
+
# end.nanos += 1000000000;
|
53
|
+
# } else if (end.nanos >= 1000000000) {
|
54
|
+
# end.seconds += 1;
|
55
|
+
# end.nanos -= 1000000000;
|
56
|
+
# }
|
57
|
+
#
|
58
|
+
# Example 3: Compute Duration from datetime.timedelta in Python.
|
59
|
+
#
|
60
|
+
# td = datetime.timedelta(days=3, minutes=10)
|
61
|
+
# duration = Duration()
|
62
|
+
# duration.FromTimedelta(td)
|
63
|
+
# @!attribute [rw] seconds
|
64
|
+
# @return [Integer]
|
65
|
+
# Signed seconds of the span of time. Must be from -315,576,000,000
|
66
|
+
# to +315,576,000,000 inclusive.
|
67
|
+
# @!attribute [rw] nanos
|
68
|
+
# @return [Integer]
|
69
|
+
# Signed fractions of a second at nanosecond resolution of the span
|
70
|
+
# of time. Durations less than one second are represented with a 0
|
71
|
+
# +seconds+ field and a positive or negative +nanos+ field. For durations
|
72
|
+
# of one second or more, a non-zero value for the +nanos+ field must be
|
73
|
+
# of the same sign as the +seconds+ field. Must be from -999,999,999
|
74
|
+
# to +999,999,999 inclusive.
|
75
|
+
class Duration; end
|
76
|
+
end
|
77
|
+
end
|
@@ -16,16 +16,16 @@ module Google
|
|
16
16
|
module Rpc
|
17
17
|
# The +Status+ type defines a logical error model that is suitable for different
|
18
18
|
# programming environments, including REST APIs and RPC APIs. It is used by
|
19
|
-
#
|
19
|
+
# [gRPC](https://github.com/grpc). The error model is designed to be:
|
20
20
|
#
|
21
|
-
#
|
22
|
-
#
|
21
|
+
# * Simple to use and understand for most users
|
22
|
+
# * Flexible enough to meet unexpected needs
|
23
23
|
#
|
24
24
|
# = Overview
|
25
25
|
#
|
26
26
|
# The +Status+ message contains three pieces of data: error code, error message,
|
27
27
|
# and error details. The error code should be an enum value of
|
28
|
-
# Google::Rpc::Code, but it may accept additional error codes if needed. The
|
28
|
+
# {Google::Rpc::Code}, but it may accept additional error codes if needed. The
|
29
29
|
# error message should be a developer-facing English message that helps
|
30
30
|
# developers *understand* and *resolve* the error. If a localized user-facing
|
31
31
|
# error message is needed, put the localized message in the error details or
|
@@ -49,31 +49,31 @@ module Google
|
|
49
49
|
#
|
50
50
|
# Example uses of this error model include:
|
51
51
|
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
#
|
52
|
+
# * Partial errors. If a service needs to return partial errors to the client,
|
53
|
+
# it may embed the +Status+ in the normal response to indicate the partial
|
54
|
+
# errors.
|
55
55
|
#
|
56
|
-
#
|
57
|
-
#
|
56
|
+
# * Workflow errors. A typical workflow has multiple steps. Each step may
|
57
|
+
# have a +Status+ message for error reporting purpose.
|
58
58
|
#
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
59
|
+
# * Batch operations. If a client uses batch request and batch response, the
|
60
|
+
# +Status+ message should be used directly inside batch response, one for
|
61
|
+
# each error sub-response.
|
62
62
|
#
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
63
|
+
# * Asynchronous operations. If an API call embeds asynchronous operation
|
64
|
+
# results in its response, the status of those operations should be
|
65
|
+
# represented directly using the +Status+ message.
|
66
66
|
#
|
67
|
-
#
|
68
|
-
#
|
67
|
+
# * Logging. If some API errors are stored in logs, the message +Status+ could
|
68
|
+
# be used directly after any stripping needed for security/privacy reasons.
|
69
69
|
# @!attribute [rw] code
|
70
70
|
# @return [Integer]
|
71
|
-
# The status code, which should be an enum value of Google::Rpc::Code.
|
71
|
+
# The status code, which should be an enum value of {Google::Rpc::Code}.
|
72
72
|
# @!attribute [rw] message
|
73
73
|
# @return [String]
|
74
74
|
# A developer-facing error message, which should be in English. Any
|
75
75
|
# user-facing error message should be localized and sent in the
|
76
|
-
# Google::Rpc::Status#details field, or localized by the client.
|
76
|
+
# {Google::Rpc::Status#details} field, or localized by the client.
|
77
77
|
# @!attribute [rw] details
|
78
78
|
# @return [Array<Google::Protobuf::Any>]
|
79
79
|
# A list of messages that carry the error details. There will be a
|
@@ -165,11 +165,15 @@ module Google
|
|
165
165
|
# Performs synchronous speech recognition: receive results after all audio
|
166
166
|
# has been sent and processed.
|
167
167
|
#
|
168
|
-
# @param config [Google::Cloud::Speech::V1::RecognitionConfig]
|
168
|
+
# @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
|
169
169
|
# *Required* Provides information to the recognizer that specifies how to
|
170
170
|
# process the request.
|
171
|
-
#
|
171
|
+
# A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
|
172
|
+
# can also be provided.
|
173
|
+
# @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
|
172
174
|
# *Required* The audio data to be recognized.
|
175
|
+
# A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
|
176
|
+
# can also be provided.
|
173
177
|
# @param options [Google::Gax::CallOptions]
|
174
178
|
# Overrides the default settings for this call, e.g, timeout,
|
175
179
|
# retries, etc.
|
@@ -178,32 +182,28 @@ module Google
|
|
178
182
|
# @example
|
179
183
|
# require "google/cloud/speech/v1"
|
180
184
|
#
|
181
|
-
#
|
182
|
-
#
|
183
|
-
# RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
|
184
|
-
# SpeechClient = Google::Cloud::Speech::V1::SpeechClient
|
185
|
-
#
|
186
|
-
# speech_client = SpeechClient.new
|
187
|
-
# encoding = AudioEncoding::FLAC
|
185
|
+
# speech_client = Google::Cloud::Speech::V1::SpeechClient.new
|
186
|
+
# encoding = :FLAC
|
188
187
|
# sample_rate_hertz = 44100
|
189
188
|
# language_code = "en-US"
|
190
|
-
# config =
|
191
|
-
#
|
192
|
-
#
|
193
|
-
#
|
189
|
+
# config = {
|
190
|
+
# encoding: encoding,
|
191
|
+
# sample_rate_hertz: sample_rate_hertz,
|
192
|
+
# language_code: language_code
|
193
|
+
# }
|
194
194
|
# uri = "gs://bucket_name/file_name.flac"
|
195
|
-
# audio =
|
196
|
-
# audio.uri = uri
|
195
|
+
# audio = { uri: uri }
|
197
196
|
# response = speech_client.recognize(config, audio)
|
198
197
|
|
199
198
|
def recognize \
|
200
199
|
config,
|
201
200
|
audio,
|
202
201
|
options: nil
|
203
|
-
req =
|
202
|
+
req = {
|
204
203
|
config: config,
|
205
204
|
audio: audio
|
206
|
-
}.delete_if { |_, v| v.nil? }
|
205
|
+
}.delete_if { |_, v| v.nil? }
|
206
|
+
req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::RecognizeRequest)
|
207
207
|
@recognize.call(req, options)
|
208
208
|
end
|
209
209
|
|
@@ -212,11 +212,15 @@ module Google
|
|
212
212
|
# +Operation.error+ or an +Operation.response+ which contains
|
213
213
|
# a +LongRunningRecognizeResponse+ message.
|
214
214
|
#
|
215
|
-
# @param config [Google::Cloud::Speech::V1::RecognitionConfig]
|
215
|
+
# @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
|
216
216
|
# *Required* Provides information to the recognizer that specifies how to
|
217
217
|
# process the request.
|
218
|
-
#
|
218
|
+
# A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
|
219
|
+
# can also be provided.
|
220
|
+
# @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
|
219
221
|
# *Required* The audio data to be recognized.
|
222
|
+
# A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
|
223
|
+
# can also be provided.
|
220
224
|
# @param options [Google::Gax::CallOptions]
|
221
225
|
# Overrides the default settings for this call, e.g, timeout,
|
222
226
|
# retries, etc.
|
@@ -225,22 +229,17 @@ module Google
|
|
225
229
|
# @example
|
226
230
|
# require "google/cloud/speech/v1"
|
227
231
|
#
|
228
|
-
#
|
229
|
-
#
|
230
|
-
# RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
|
231
|
-
# SpeechClient = Google::Cloud::Speech::V1::SpeechClient
|
232
|
-
#
|
233
|
-
# speech_client = SpeechClient.new
|
234
|
-
# encoding = AudioEncoding::FLAC
|
232
|
+
# speech_client = Google::Cloud::Speech::V1::SpeechClient.new
|
233
|
+
# encoding = :FLAC
|
235
234
|
# sample_rate_hertz = 44100
|
236
235
|
# language_code = "en-US"
|
237
|
-
# config =
|
238
|
-
#
|
239
|
-
#
|
240
|
-
#
|
236
|
+
# config = {
|
237
|
+
# encoding: encoding,
|
238
|
+
# sample_rate_hertz: sample_rate_hertz,
|
239
|
+
# language_code: language_code
|
240
|
+
# }
|
241
241
|
# uri = "gs://bucket_name/file_name.flac"
|
242
|
-
# audio =
|
243
|
-
# audio.uri = uri
|
242
|
+
# audio = { uri: uri }
|
244
243
|
#
|
245
244
|
# # Register a callback during the method call.
|
246
245
|
# operation = speech_client.long_running_recognize(config, audio) do |op|
|
@@ -273,10 +272,11 @@ module Google
|
|
273
272
|
config,
|
274
273
|
audio,
|
275
274
|
options: nil
|
276
|
-
req =
|
275
|
+
req = {
|
277
276
|
config: config,
|
278
277
|
audio: audio
|
279
|
-
}.delete_if { |_, v| v.nil? }
|
278
|
+
}.delete_if { |_, v| v.nil? }
|
279
|
+
req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::LongRunningRecognizeRequest)
|
280
280
|
operation = Google::Gax::Operation.new(
|
281
281
|
@long_running_recognize.call(req, options),
|
282
282
|
@operations_client,
|
@@ -309,11 +309,8 @@ module Google
|
|
309
309
|
# @example
|
310
310
|
# require "google/cloud/speech/v1"
|
311
311
|
#
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
315
|
-
# speech_client = SpeechClient.new
|
316
|
-
# request = StreamingRecognizeRequest.new
|
312
|
+
# speech_client = Google::Cloud::Speech::V1::SpeechClient.new
|
313
|
+
# request = {}
|
317
314
|
# requests = [request]
|
318
315
|
# speech_client.streaming_recognize(requests).each do |element|
|
319
316
|
# # Process element.
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-speech
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.26.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Moore
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-07-
|
12
|
+
date: 2017-07-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: google-cloud-core
|
@@ -179,6 +179,7 @@ files:
|
|
179
179
|
- lib/google-cloud-speech.rb
|
180
180
|
- lib/google/cloud/speech.rb
|
181
181
|
- lib/google/cloud/speech/audio.rb
|
182
|
+
- lib/google/cloud/speech/convert.rb
|
182
183
|
- lib/google/cloud/speech/credentials.rb
|
183
184
|
- lib/google/cloud/speech/operation.rb
|
184
185
|
- lib/google/cloud/speech/project.rb
|
@@ -190,6 +191,7 @@ files:
|
|
190
191
|
- lib/google/cloud/speech/v1/cloud_speech_services_pb.rb
|
191
192
|
- lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb
|
192
193
|
- lib/google/cloud/speech/v1/doc/google/protobuf/any.rb
|
194
|
+
- lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb
|
193
195
|
- lib/google/cloud/speech/v1/doc/google/rpc/status.rb
|
194
196
|
- lib/google/cloud/speech/v1/speech_client.rb
|
195
197
|
- lib/google/cloud/speech/v1/speech_client_config.json
|