google-cloud-speech 0.25.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ccecf5215ecbd7fd0e0ef074b7638711905b8450
4
- data.tar.gz: f4c270117ad7bc3611e435fb598b5dd6bcbc835c
3
+ metadata.gz: 454b1310e5e39c2b4ac676c26b6aea8a89f452c5
4
+ data.tar.gz: 12fa7c7129f20bdf92c5e61c3e1e294307937f1f
5
5
  SHA512:
6
- metadata.gz: d034fa22e135e05a08e14b09ffc2cb075a5cd7757f717a7635bfbd551539d853935670664c21cd636f1b2f2bd68da0d769f2747c8f7b0002b79628d49360197e
7
- data.tar.gz: 5b3101e327dbb801a311abc8cc0dcaf2ec3774cce69c22f8bd629fff62701bc9ab7b8717ef4b923dc0c95d0d4d3efb32642fd876d0d0dbe91cb4d99a0ed0fb0d
6
+ metadata.gz: dca3bf30a3edad90e5dec095183a4a721491b2cef886ecc82fa4a83afe6fdc3c5e1387b336f9d0a96286b6605dc58cead675242e8d04338b2c44c17896c9b3fd
7
+ data.tar.gz: 6ce650dbbeb8cccac3a3e63c290b191ff3c92436d65cb1a1887d6e3480ca4707dfd326017186f3da278f913e70057258d03469df503f9fcd0ce6c29a944e4fb5
@@ -191,6 +191,10 @@ module Google
191
191
  # phrases "hints" so that the speech recognition is more likely to
192
192
  # recognize them. See [usage
193
193
  # limits](https://cloud.google.com/speech/limits#content). Optional.
194
+ # @param [Boolean] words When `true`, return a list of words with
195
+ # additional information about each word. Currently, the only
196
+ # additional information provided is the the start and end time
197
+ # offsets. See {Result#words}. Default is `false`.
194
198
  #
195
199
  # @return [Array<Result>] The transcribed text of audio recognized.
196
200
  #
@@ -209,14 +213,15 @@ module Google
209
213
  # result.transcript #=> "how old is the Brooklyn Bridge"
210
214
  # result.confidence #=> 0.9826789498329163
211
215
  #
212
- def recognize max_alternatives: nil, profanity_filter: nil, phrases: nil
216
+ def recognize max_alternatives: nil, profanity_filter: nil,
217
+ phrases: nil, words: nil
213
218
  ensure_speech!
214
219
 
215
220
  speech.recognize self, encoding: encoding, sample_rate: sample_rate,
216
221
  language: language,
217
222
  max_alternatives: max_alternatives,
218
223
  profanity_filter: profanity_filter,
219
- phrases: phrases
224
+ phrases: phrases, words: words
220
225
  end
221
226
 
222
227
  ##
@@ -239,6 +244,10 @@ module Google
239
244
  # phrases "hints" so that the speech recognition is more likely to
240
245
  # recognize them. See [usage
241
246
  # limits](https://cloud.google.com/speech/limits#content). Optional.
247
+ # @param [Boolean] words When `true`, return a list of words with
248
+ # additional information about each word. Currently, the only
249
+ # additional information provided is the the start and end time
250
+ # offsets. See {Result#words}. Default is `false`.
242
251
  #
243
252
  # @return [Operation] A resource represents the long-running,
244
253
  # asynchronous processing of a speech-recognition operation.
@@ -260,7 +269,7 @@ module Google
260
269
  # results = op.results
261
270
  #
262
271
  def process max_alternatives: nil, profanity_filter: nil,
263
- phrases: nil
272
+ phrases: nil, words: nil
264
273
  ensure_speech!
265
274
 
266
275
  speech.process self, encoding: encoding,
@@ -268,7 +277,7 @@ module Google
268
277
  language: language,
269
278
  max_alternatives: max_alternatives,
270
279
  profanity_filter: profanity_filter,
271
- phrases: phrases
280
+ phrases: phrases, words: words
272
281
  end
273
282
  alias_method :long_running_recognize, :process
274
283
  alias_method :recognize_job, :process
@@ -0,0 +1,46 @@
1
+ # Copyright 2017 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/protobuf/duration_pb"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Speech
21
+ ##
22
+ # @private Helper module for converting Speech values.
23
+ module Convert
24
+ module ClassMethods
25
+ def number_to_duration number
26
+ return nil if number.nil?
27
+
28
+ Google::Protobuf::Duration.new \
29
+ seconds: number.to_i,
30
+ nanos: (number.remainder(1) * 1000000000).round
31
+ end
32
+
33
+ def duration_to_number duration
34
+ return nil if duration.nil?
35
+
36
+ return duration.seconds if duration.nanos == 0
37
+
38
+ duration.seconds + (duration.nanos / 1000000000.0)
39
+ end
40
+ end
41
+
42
+ extend ClassMethods
43
+ end
44
+ end
45
+ end
46
+ end
@@ -266,6 +266,10 @@ module Google
266
266
  # phrases "hints" so that the speech recognition is more likely to
267
267
  # recognize them. See [usage
268
268
  # limits](https://cloud.google.com/speech/limits#content). Optional.
269
+ # @param [Boolean] words When `true`, return a list of words with
270
+ # additional information about each word. Currently, the only
271
+ # additional information provided is the the start and end time
272
+ # offsets. See {Result#words}. Default is `false`.
269
273
  #
270
274
  # @return [Array<Result>] The transcribed text of audio recognized.
271
275
  #
@@ -308,7 +312,8 @@ module Google
308
312
  # max_alternatives: 10
309
313
  #
310
314
  def recognize source, encoding: nil, language: nil, sample_rate: nil,
311
- max_alternatives: nil, profanity_filter: nil, phrases: nil
315
+ max_alternatives: nil, profanity_filter: nil,
316
+ phrases: nil, words: nil
312
317
  ensure_service!
313
318
 
314
319
  audio_obj = audio source, encoding: encoding, language: language,
@@ -317,7 +322,8 @@ module Google
317
322
  config = audio_config(
318
323
  encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
319
324
  language: audio_obj.language, max_alternatives: max_alternatives,
320
- profanity_filter: profanity_filter, phrases: phrases)
325
+ profanity_filter: profanity_filter, phrases: phrases,
326
+ words: words)
321
327
 
322
328
  grpc = service.recognize_sync audio_obj.to_grpc, config
323
329
  grpc.results.map do |result_grpc|
@@ -388,6 +394,10 @@ module Google
388
394
  # phrases "hints" so that the speech recognition is more likely to
389
395
  # recognize them. See [usage
390
396
  # limits](https://cloud.google.com/speech/limits#content). Optional.
397
+ # @param [Boolean] words When `true`, return a list of words with
398
+ # additional information about each word. Currently, the only
399
+ # additional information provided is the the start and end time
400
+ # offsets. See {Result#words}. Default is `false`.
391
401
  #
392
402
  # @return [Operation] A resource represents the long-running,
393
403
  # asynchronous processing of a speech-recognition operation.
@@ -440,7 +450,8 @@ module Google
440
450
  # op.reload!
441
451
  #
442
452
  def process source, encoding: nil, sample_rate: nil, language: nil,
443
- max_alternatives: nil, profanity_filter: nil, phrases: nil
453
+ max_alternatives: nil, profanity_filter: nil, phrases: nil,
454
+ words: nil
444
455
  ensure_service!
445
456
 
446
457
  audio_obj = audio source, encoding: encoding, language: language,
@@ -449,7 +460,8 @@ module Google
449
460
  config = audio_config(
450
461
  encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
451
462
  language: audio_obj.language, max_alternatives: max_alternatives,
452
- profanity_filter: profanity_filter, phrases: phrases)
463
+ profanity_filter: profanity_filter, phrases: phrases,
464
+ words: words)
453
465
 
454
466
  grpc = service.recognize_async audio_obj.to_grpc, config
455
467
  Operation.from_grpc grpc
@@ -513,6 +525,10 @@ module Google
513
525
  # phrases "hints" so that the speech recognition is more likely to
514
526
  # recognize them. See [usage
515
527
  # limits](https://cloud.google.com/speech/limits#content). Optional.
528
+ # @param [Boolean] words When `true`, return a list of words with
529
+ # additional information about each word. Currently, the only
530
+ # additional information provided is the the start and end time
531
+ # offsets. See {Result#words}. Default is `false`.
516
532
  # @param [Boolean] utterance When `true`, the service will perform
517
533
  # continuous recognition (continuing to process audio even if the user
518
534
  # pauses speaking) until the client closes the output stream (gRPC
@@ -550,7 +566,7 @@ module Google
550
566
  #
551
567
  def stream encoding: nil, language: nil, sample_rate: nil,
552
568
  max_alternatives: nil, profanity_filter: nil, phrases: nil,
553
- utterance: nil, interim: nil
569
+ words: nil, utterance: nil, interim: nil
554
570
  ensure_service!
555
571
 
556
572
  grpc_req = V1::StreamingRecognizeRequest.new(
@@ -561,7 +577,7 @@ module Google
561
577
  sample_rate: sample_rate,
562
578
  max_alternatives: max_alternatives,
563
579
  profanity_filter: profanity_filter,
564
- phrases: phrases),
580
+ phrases: phrases, words: words),
565
581
  single_utterance: utterance,
566
582
  interim_results: interim
567
583
  }.delete_if { |_, v| v.nil? }
@@ -608,7 +624,7 @@ module Google
608
624
 
609
625
  def audio_config encoding: nil, language: nil, sample_rate: nil,
610
626
  max_alternatives: nil, profanity_filter: nil,
611
- phrases: nil
627
+ phrases: nil, words: nil
612
628
  contexts = nil
613
629
  contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
614
630
  language = String(language) unless language.nil?
@@ -618,7 +634,8 @@ module Google
618
634
  sample_rate_hertz: sample_rate,
619
635
  max_alternatives: max_alternatives,
620
636
  profanity_filter: profanity_filter,
621
- speech_contexts: contexts
637
+ speech_contexts: contexts,
638
+ enable_word_time_offsets: words
622
639
  }.delete_if { |_, v| v.nil? })
623
640
  end
624
641
 
@@ -14,6 +14,7 @@
14
14
 
15
15
 
16
16
  require "google/cloud/speech/v1"
17
+ require "google/cloud/speech/convert"
17
18
 
18
19
  module Google
19
20
  module Cloud
@@ -35,6 +36,10 @@ module Google
35
36
  # recognition is correct. This field is typically provided only for the
36
37
  # top hypothesis. A value of 0.0 is a sentinel value indicating
37
38
  # confidence was not set.
39
+ # @attr_reader [Array<Result::Word>] words A list of words with additional
40
+ # information about each word. Currently, the only additional
41
+ # information provided is the the start and end time offsets. Available
42
+ # when using the `words` argument in relevant methods.
38
43
  # @attr_reader [Array<Result::Alternative>] alternatives Additional
39
44
  # recognition hypotheses (up to the value specified in
40
45
  # `max_alternatives`). The server may return fewer than
@@ -56,13 +61,14 @@ module Google
56
61
  # result.confidence #=> 0.9826789498329163
57
62
  #
58
63
  class Result
59
- attr_reader :transcript, :confidence, :alternatives
64
+ attr_reader :transcript, :confidence, :words, :alternatives
60
65
 
61
66
  ##
62
67
  # @private Creates a new Results instance.
63
- def initialize transcript, confidence, alternatives = []
64
- @transcript = transcript
65
- @confidence = confidence
68
+ def initialize transcript, confidence, words = [], alternatives = []
69
+ @transcript = transcript
70
+ @confidence = confidence
71
+ @words = words
66
72
  @alternatives = alternatives
67
73
  end
68
74
 
@@ -71,10 +77,42 @@ module Google
71
77
  def self.from_grpc grpc
72
78
  head, *tail = *grpc.alternatives
73
79
  return nil if head.nil?
80
+ words = Array(head.words).map do |w|
81
+ Word.new w.word, Convert.duration_to_number(w.start_time),
82
+ Convert.duration_to_number(w.end_time)
83
+ end
74
84
  alternatives = tail.map do |alt|
75
85
  Alternative.new alt.transcript, alt.confidence
76
86
  end
77
- new head.transcript, head.confidence, alternatives
87
+ new head.transcript, head.confidence, words, alternatives
88
+ end
89
+
90
+ ##
91
+ # Word-specific information for recognized words. Currently, the only
92
+ # additional information provided is the the start and end time offsets.
93
+ # Available when using the `words` argument in relevant methods.
94
+ #
95
+ # @attr_reader [String] word The word corresponding to this set of
96
+ # information.
97
+ # @attr_reader [Numeric] start_time Time offset relative to the
98
+ # beginning of the audio, and corresponding to the start of the spoken
99
+ # word. This field is only set if `words` was specified. This is an
100
+ # experimental feature and the accuracy of the time offset can vary.
101
+ # @attr_reader [Numeric] end_time Time offset relative to the
102
+ # beginning of the audio, and corresponding to the end of the spoken
103
+ # word. This field is only set if `words` was specified. This is an
104
+ # experimental feature and the accuracy of the time offset can vary.
105
+ class Word
106
+ attr_reader :word, :start_time, :end_time
107
+ alias_method :to_str, :word
108
+
109
+ ##
110
+ # @private Creates a new Result::Word instance.
111
+ def initialize word, start_time, end_time
112
+ @word = word
113
+ @start_time = start_time
114
+ @end_time = end_time
115
+ end
78
116
  end
79
117
 
80
118
  ##
@@ -114,7 +152,7 @@ module Google
114
152
  ##
115
153
  # @private Creates a new Result::Alternative instance.
116
154
  def initialize transcript, confidence
117
- @transcript = transcript
155
+ @transcript = transcript
118
156
  @confidence = confidence
119
157
  end
120
158
  end
@@ -1,4 +1,5 @@
1
1
  # Copyright 2017, Google Inc. All rights reserved.
2
+ #
2
3
  # Licensed under the Apache License, Version 2.0 (the "License");
3
4
  # you may not use this file except in compliance with the License.
4
5
  # You may obtain a copy of the License at
@@ -11,4 +12,71 @@
11
12
  # See the License for the specific language governing permissions and
12
13
  # limitations under the License.
13
14
 
15
+ module Google
16
+ module Cloud
17
+ # rubocop:disable LineLength
18
+
19
+ ##
20
+ # # Ruby Client for Google Cloud Speech API ([Alpha](https://github.com/GoogleCloudPlatform/google-cloud-ruby#versioning))
21
+ #
22
+ # [Google Cloud Speech API][Product Documentation]: Google Cloud Speech API.
23
+ # - [Product Documentation][]
24
+ #
25
+ # ## Quick Start
26
+ # In order to use this library, you first need to go through the following steps:
27
+ #
28
+ # 1. [Select or create a Cloud Platform project.](https://console.cloud.google.com/project)
29
+ # 2. [Enable the Google Cloud Speech API.](https://console.cloud.google.com/apis/api/speech)
30
+ # 3. [Setup Authentication.](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud/master/guides/authentication)
31
+ #
32
+ # ### Installation
33
+ # ```
34
+ # $ gem install google-cloud-speech
35
+ # ```
36
+ #
37
+ # ### Preview
38
+ # #### SpeechClient
39
+ # ```rb
40
+ # require "google/cloud/speech/v1"
41
+ #
42
+ # speech_client = Google::Cloud::Speech::V1::SpeechClient.new
43
+ # language_code = "en-US"
44
+ # sample_rate_hertz = 44100
45
+ # encoding = :FLAC
46
+ # config = {
47
+ # language_code: language_code,
48
+ # sample_rate_hertz: sample_rate_hertz,
49
+ # encoding: encoding
50
+ # }
51
+ # uri = "gs://gapic-toolkit/hello.flac"
52
+ # audio = { uri: uri }
53
+ # response = speech_client.recognize(config, audio)
54
+ # ```
55
+ #
56
+ # ### Next Steps
57
+ # - Read the [Google Cloud Speech API Product documentation][Product Documentation] to learn more about the product and see How-to Guides.
58
+ # - View this [repository's main README](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/README.md) to see the full list of Cloud APIs that we cover.
59
+ #
60
+ # [Product Documentation]: https://cloud.google.com/speech
61
+ #
62
+ module Speech
63
+ # rubocop:enable LineLength
64
+
65
+ ##
66
+ # # Google Cloud Speech API Contents
67
+ #
68
+ # | Class | Description |
69
+ # | ----- | ----------- |
70
+ # | [SpeechClient][] | Google Cloud Speech API. |
71
+ # | [Data Types][] | Data types for Google::Cloud::Speech::V1 |
72
+ #
73
+ # [SpeechClient]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/speechclient
74
+ # [Data Types]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/datatypes
75
+ #
76
+ module V1
77
+ end
78
+ end
79
+ end
80
+ end
81
+
14
82
  require "google/cloud/speech/v1/speech_client"
@@ -36,6 +36,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
36
36
  optional :max_alternatives, :int32, 4
37
37
  optional :profanity_filter, :bool, 5
38
38
  repeated :speech_contexts, :message, 6, "google.cloud.speech.v1.SpeechContext"
39
+ optional :enable_word_time_offsets, :bool, 8
39
40
  end
40
41
  add_enum "google.cloud.speech.v1.RecognitionConfig.AudioEncoding" do
41
42
  value :ENCODING_UNSPECIFIED, 0
@@ -87,6 +88,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
87
88
  add_message "google.cloud.speech.v1.SpeechRecognitionAlternative" do
88
89
  optional :transcript, :string, 1
89
90
  optional :confidence, :float, 2
91
+ repeated :words, :message, 3, "google.cloud.speech.v1.WordInfo"
92
+ end
93
+ add_message "google.cloud.speech.v1.WordInfo" do
94
+ optional :start_time, :message, 1, "google.protobuf.Duration"
95
+ optional :end_time, :message, 2, "google.protobuf.Duration"
96
+ optional :word, :string, 3
90
97
  end
91
98
  end
92
99
 
@@ -110,6 +117,7 @@ module Google
110
117
  StreamingRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionResult").msgclass
111
118
  SpeechRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionResult").msgclass
112
119
  SpeechRecognitionAlternative = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionAlternative").msgclass
120
+ WordInfo = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.WordInfo").msgclass
113
121
  end
114
122
  end
115
123
  end
@@ -56,7 +56,7 @@ module Google
56
56
  # +audio_content+ data. The audio bytes must be encoded as specified in
57
57
  # +RecognitionConfig+. Note: as with all bytes fields, protobuffers use a
58
58
  # pure binary representation (not base64). See
59
- # {audio limits}[https://cloud.google.com/speech/limits#content].
59
+ # [audio limits](https://cloud.google.com/speech/limits#content).
60
60
  class StreamingRecognizeRequest; end
61
61
 
62
62
  # Provides information to the recognizer that specifies how to process the
@@ -101,9 +101,9 @@ module Google
101
101
  # @!attribute [rw] language_code
102
102
  # @return [String]
103
103
  # *Required* The language of the supplied audio as a
104
- # {BCP-47}[https://www.rfc-editor.org/rfc/bcp/bcp47.txt] language tag.
104
+ # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
105
105
  # Example: "en-US".
106
- # See {Language Support}[https://cloud.google.com/speech/docs/languages]
106
+ # See [Language Support](https://cloud.google.com/speech/docs/languages)
107
107
  # for a list of the currently supported language codes.
108
108
  # @!attribute [rw] max_alternatives
109
109
  # @return [Integer]
@@ -122,11 +122,17 @@ module Google
122
122
  # @!attribute [rw] speech_contexts
123
123
  # @return [Array<Google::Cloud::Speech::V1::SpeechContext>]
124
124
  # *Optional* A means to provide context to assist the speech recognition.
125
+ # @!attribute [rw] enable_word_time_offsets
126
+ # @return [true, false]
127
+ # *Optional* If +true+, the top result includes a list of words and
128
+ # the start and end time offsets (timestamps) for those words. If
129
+ # +false+, no word-level time offset information is returned. The default is
130
+ # +false+.
125
131
  class RecognitionConfig
126
132
  # Audio encoding of the data sent in the audio message. All encodings support
127
- # only 1 channel (mono) audio. Only +FLAC+ includes a header that describes
128
- # the bytes of audio that follow the header. The other encodings are raw
129
- # audio bytes with no header.
133
+ # only 1 channel (mono) audio. Only +FLAC+ and +WAV+ include a header that
134
+ # describes the bytes of audio that follow the header. The other encodings
135
+ # are raw audio bytes with no header.
130
136
  #
131
137
  # For best results, the audio source should be captured and transmitted using
132
138
  # a lossless encoding (+FLAC+ or +LINEAR16+). Recognition accuracy may be
@@ -134,13 +140,13 @@ module Google
134
140
  # this section, are used to capture or transmit the audio, particularly if
135
141
  # background noise is present.
136
142
  module AudioEncoding
137
- # Not specified. Will return result Google::Rpc::Code::INVALID_ARGUMENT.
143
+ # Not specified. Will return result {Google::Rpc::Code::INVALID_ARGUMENT}.
138
144
  ENCODING_UNSPECIFIED = 0
139
145
 
140
146
  # Uncompressed 16-bit signed little-endian samples (Linear PCM).
141
147
  LINEAR16 = 1
142
148
 
143
- # {+FLAC+}[https://xiph.org/flac/documentation.html] (Free Lossless Audio
149
+ # [+FLAC+](https://xiph.org/flac/documentation.html) (Free Lossless Audio
144
150
  # Codec) is the recommended encoding because it is
145
151
  # lossless--therefore recognition is not compromised--and
146
152
  # requires only about half the bandwidth of +LINEAR16+. +FLAC+ stream
@@ -158,17 +164,17 @@ module Google
158
164
  AMR_WB = 5
159
165
 
160
166
  # Opus encoded audio frames in Ogg container
161
- # ({OggOpus}[https://wiki.xiph.org/OggOpus]).
167
+ # ([OggOpus](https://wiki.xiph.org/OggOpus)).
162
168
  # +sample_rate_hertz+ must be 16000.
163
169
  OGG_OPUS = 6
164
170
 
165
171
  # Although the use of lossy encodings is not recommended, if a very low
166
172
  # bitrate encoding is required, +OGG_OPUS+ is highly preferred over
167
- # Speex encoding. The {Speex}[https://speex.org/] encoding supported by
173
+ # Speex encoding. The [Speex](https://speex.org/) encoding supported by
168
174
  # Cloud Speech API has a header byte in each block, as in MIME type
169
175
  # +audio/x-speex-with-header-byte+.
170
176
  # It is a variant of the RTP Speex encoding defined in
171
- # {RFC 5574}[https://tools.ietf.org/html/rfc5574].
177
+ # [RFC 5574](https://tools.ietf.org/html/rfc5574).
172
178
  # The stream is a sequence of blocks, one block per RTP packet. Each block
173
179
  # starts with a byte containing the length of the block, in bytes, followed
174
180
  # by one or more frames of Speex data, padded to an integral number of
@@ -188,13 +194,13 @@ module Google
188
194
  # to improve the accuracy for specific words and phrases, for example, if
189
195
  # specific commands are typically spoken by the user. This can also be used
190
196
  # to add additional words to the vocabulary of the recognizer. See
191
- # {usage limits}[https://cloud.google.com/speech/limits#content].
197
+ # [usage limits](https://cloud.google.com/speech/limits#content).
192
198
  class SpeechContext; end
193
199
 
194
200
  # Contains audio data in the encoding specified in the +RecognitionConfig+.
195
201
  # Either +content+ or +uri+ must be supplied. Supplying both or neither
196
- # returns Google::Rpc::Code::INVALID_ARGUMENT. See
197
- # {audio limits}[https://cloud.google.com/speech/limits#content].
202
+ # returns {Google::Rpc::Code::INVALID_ARGUMENT}. See
203
+ # [audio limits](https://cloud.google.com/speech/limits#content).
198
204
  # @!attribute [rw] content
199
205
  # @return [String]
200
206
  # The audio data bytes encoded as specified in
@@ -206,8 +212,8 @@ module Google
206
212
  # +RecognitionConfig+. Currently, only Google Cloud Storage URIs are
207
213
  # supported, which must be specified in the following format:
208
214
  # +gs://bucket_name/object_name+ (other URI formats return
209
- # Google::Rpc::Code::INVALID_ARGUMENT). For more information, see
210
- # {Request URIs}[https://cloud.google.com/storage/docs/reference-uris].
215
+ # {Google::Rpc::Code::INVALID_ARGUMENT}). For more information, see
216
+ # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
211
217
  class RecognitionAudio; end
212
218
 
213
219
  # The only message returned to the client by the +Recognize+ method. It
@@ -269,34 +275,32 @@ module Google
269
275
  # 6. results { alternatives { transcript: " that is" } stability: 0.9 }
270
276
  # results { alternatives { transcript: " the question" } stability: 0.01 }
271
277
  #
272
- # 7. speech_event_type: END_OF_SINGLE_UTTERANCE
273
- #
274
- # 8. results { alternatives { transcript: " that is the question"
278
+ # 7. results { alternatives { transcript: " that is the question"
275
279
  # confidence: 0.98 }
276
280
  # alternatives { transcript: " that was the question" }
277
281
  # is_final: true }
278
282
  #
279
283
  # Notes:
280
284
  #
281
- # - Only two of the above responses #4 and #8 contain final results; they are
285
+ # * Only two of the above responses #4 and #7 contain final results; they are
282
286
  # indicated by +is_final: true+. Concatenating these together generates the
283
287
  # full transcript: "to be or not to be that is the question".
284
288
  #
285
- # - The others contain interim +results+. #3 and #6 contain two interim
289
+ # * The others contain interim +results+. #3 and #6 contain two interim
286
290
  # +results+: the first portion has a high stability and is less likely to
287
291
  # change; the second portion has a low stability and is very likely to
288
292
  # change. A UI designer might choose to show only high stability +results+.
289
293
  #
290
- # - The specific +stability+ and +confidence+ values shown above are only for
294
+ # * The specific +stability+ and +confidence+ values shown above are only for
291
295
  # illustrative purposes. Actual values may vary.
292
296
  #
293
- # - In each response, only one of these fields will be set:
294
- # +error+,
295
- # +speech_event_type+, or
296
- # one or more (repeated) +results+.
297
+ # * In each response, only one of these fields will be set:
298
+ # +error+,
299
+ # +speech_event_type+, or
300
+ # one or more (repeated) +results+.
297
301
  # @!attribute [rw] error
298
302
  # @return [Google::Rpc::Status]
299
- # *Output-only* If set, returns a Google::Rpc::Status message that
303
+ # *Output-only* If set, returns a {Google::Rpc::Status} message that
300
304
  # specifies the error for the operation.
301
305
  # @!attribute [rw] results
302
306
  # @return [Array<Google::Cloud::Speech::V1::StreamingRecognitionResult>]
@@ -351,6 +355,8 @@ module Google
351
355
  # @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionAlternative>]
352
356
  # *Output-only* May contain one or more recognition hypotheses (up to the
353
357
  # maximum specified in +max_alternatives+).
358
+ # These alternatives are ordered in terms of accuracy, with the top (first)
359
+ # alternative being the most probable, as ranked by the recognizer.
354
360
  class SpeechRecognitionResult; end
355
361
 
356
362
  # Alternative hypotheses (a.k.a. n-best list).
@@ -363,10 +369,36 @@ module Google
363
369
  # indicates an estimated greater likelihood that the recognized words are
364
370
  # correct. This field is typically provided only for the top hypothesis, and
365
371
  # only for +is_final=true+ results. Clients should not rely on the
366
- # +confidence+ field as it is not guaranteed to be accurate, or even set, in
367
- # any of the results.
372
+ # +confidence+ field as it is not guaranteed to be accurate or consistent.
368
373
  # The default of 0.0 is a sentinel value indicating +confidence+ was not set.
374
+ # @!attribute [rw] words
375
+ # @return [Array<Google::Cloud::Speech::V1::WordInfo>]
376
+ # *Output-only* A list of word-specific information for each recognized word.
369
377
  class SpeechRecognitionAlternative; end
378
+
379
+ # Word-specific information for recognized words. Word information is only
380
+ # included in the response when certain request parameters are set, such
381
+ # as +enable_word_time_offsets+.
382
+ # @!attribute [rw] start_time
383
+ # @return [Google::Protobuf::Duration]
384
+ # *Output-only* Time offset relative to the beginning of the audio,
385
+ # and corresponding to the start of the spoken word.
386
+ # This field is only set if +enable_word_time_offsets=true+ and only
387
+ # in the top hypothesis.
388
+ # This is an experimental feature and the accuracy of the time offset can
389
+ # vary.
390
+ # @!attribute [rw] end_time
391
+ # @return [Google::Protobuf::Duration]
392
+ # *Output-only* Time offset relative to the beginning of the audio,
393
+ # and corresponding to the end of the spoken word.
394
+ # This field is only set if +enable_word_time_offsets=true+ and only
395
+ # in the top hypothesis.
396
+ # This is an experimental feature and the accuracy of the time offset can
397
+ # vary.
398
+ # @!attribute [rw] word
399
+ # @return [String]
400
+ # *Output-only* The word corresponding to this set of information.
401
+ class WordInfo; end
370
402
  end
371
403
  end
372
404
  end
@@ -77,7 +77,7 @@ module Google
77
77
  # If the embedded message type is well-known and has a custom JSON
78
78
  # representation, that representation will be embedded adding a field
79
79
  # +value+ which holds the custom JSON in addition to the +@type+
80
- # field. Example (for message Google::Protobuf::Duration):
80
+ # field. Example (for message {Google::Protobuf::Duration}):
81
81
  #
82
82
  # {
83
83
  # "@type": "type.googleapis.com/google.protobuf.Duration",
@@ -96,7 +96,7 @@ module Google
96
96
  # qualified name of the type (as in +path/google.protobuf.Duration+).
97
97
  # The name should be in a canonical form (e.g., leading "." is
98
98
  # not accepted).
99
- # * An HTTP GET on the URL must yield a Google::Protobuf::Type
99
+ # * An HTTP GET on the URL must yield a {Google::Protobuf::Type}
100
100
  # value in binary format, or produce an error.
101
101
  # * Applications are allowed to cache lookup results based on the
102
102
  # URL, or have them precompiled into a binary to avoid any
@@ -0,0 +1,77 @@
1
+ # Copyright 2017, Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Google
16
+ module Protobuf
17
+ # A Duration represents a signed, fixed-length span of time represented
18
+ # as a count of seconds and fractions of seconds at nanosecond
19
+ # resolution. It is independent of any calendar and concepts like "day"
20
+ # or "month". It is related to Timestamp in that the difference between
21
+ # two Timestamp values is a Duration and it can be added or subtracted
22
+ # from a Timestamp. Range is approximately +-10,000 years.
23
+ #
24
+ # Example 1: Compute Duration from two Timestamps in pseudo code.
25
+ #
26
+ # Timestamp start = ...;
27
+ # Timestamp end = ...;
28
+ # Duration duration = ...;
29
+ #
30
+ # duration.seconds = end.seconds - start.seconds;
31
+ # duration.nanos = end.nanos - start.nanos;
32
+ #
33
+ # if (duration.seconds < 0 && duration.nanos > 0) {
34
+ # duration.seconds += 1;
35
+ # duration.nanos -= 1000000000;
36
+ # } else if (durations.seconds > 0 && duration.nanos < 0) {
37
+ # duration.seconds -= 1;
38
+ # duration.nanos += 1000000000;
39
+ # }
40
+ #
41
+ # Example 2: Compute Timestamp from Timestamp + Duration in pseudo code.
42
+ #
43
+ # Timestamp start = ...;
44
+ # Duration duration = ...;
45
+ # Timestamp end = ...;
46
+ #
47
+ # end.seconds = start.seconds + duration.seconds;
48
+ # end.nanos = start.nanos + duration.nanos;
49
+ #
50
+ # if (end.nanos < 0) {
51
+ # end.seconds -= 1;
52
+ # end.nanos += 1000000000;
53
+ # } else if (end.nanos >= 1000000000) {
54
+ # end.seconds += 1;
55
+ # end.nanos -= 1000000000;
56
+ # }
57
+ #
58
+ # Example 3: Compute Duration from datetime.timedelta in Python.
59
+ #
60
+ # td = datetime.timedelta(days=3, minutes=10)
61
+ # duration = Duration()
62
+ # duration.FromTimedelta(td)
63
+ # @!attribute [rw] seconds
64
+ # @return [Integer]
65
+ # Signed seconds of the span of time. Must be from -315,576,000,000
66
+ # to +315,576,000,000 inclusive.
67
+ # @!attribute [rw] nanos
68
+ # @return [Integer]
69
+ # Signed fractions of a second at nanosecond resolution of the span
70
+ # of time. Durations less than one second are represented with a 0
71
+ # +seconds+ field and a positive or negative +nanos+ field. For durations
72
+ # of one second or more, a non-zero value for the +nanos+ field must be
73
+ # of the same sign as the +seconds+ field. Must be from -999,999,999
74
+ # to +999,999,999 inclusive.
75
+ class Duration; end
76
+ end
77
+ end
@@ -16,16 +16,16 @@ module Google
16
16
  module Rpc
17
17
  # The +Status+ type defines a logical error model that is suitable for different
18
18
  # programming environments, including REST APIs and RPC APIs. It is used by
19
- # {gRPC}[https://github.com/grpc]. The error model is designed to be:
19
+ # [gRPC](https://github.com/grpc). The error model is designed to be:
20
20
  #
21
- # - Simple to use and understand for most users
22
- # - Flexible enough to meet unexpected needs
21
+ # * Simple to use and understand for most users
22
+ # * Flexible enough to meet unexpected needs
23
23
  #
24
24
  # = Overview
25
25
  #
26
26
  # The +Status+ message contains three pieces of data: error code, error message,
27
27
  # and error details. The error code should be an enum value of
28
- # Google::Rpc::Code, but it may accept additional error codes if needed. The
28
+ # {Google::Rpc::Code}, but it may accept additional error codes if needed. The
29
29
  # error message should be a developer-facing English message that helps
30
30
  # developers *understand* and *resolve* the error. If a localized user-facing
31
31
  # error message is needed, put the localized message in the error details or
@@ -49,31 +49,31 @@ module Google
49
49
  #
50
50
  # Example uses of this error model include:
51
51
  #
52
- # - Partial errors. If a service needs to return partial errors to the client,
53
- # it may embed the +Status+ in the normal response to indicate the partial
54
- # errors.
52
+ # * Partial errors. If a service needs to return partial errors to the client,
53
+ # it may embed the +Status+ in the normal response to indicate the partial
54
+ # errors.
55
55
  #
56
- # - Workflow errors. A typical workflow has multiple steps. Each step may
57
- # have a +Status+ message for error reporting purpose.
56
+ # * Workflow errors. A typical workflow has multiple steps. Each step may
57
+ # have a +Status+ message for error reporting purpose.
58
58
  #
59
- # - Batch operations. If a client uses batch request and batch response, the
60
- # +Status+ message should be used directly inside batch response, one for
61
- # each error sub-response.
59
+ # * Batch operations. If a client uses batch request and batch response, the
60
+ # +Status+ message should be used directly inside batch response, one for
61
+ # each error sub-response.
62
62
  #
63
- # - Asynchronous operations. If an API call embeds asynchronous operation
64
- # results in its response, the status of those operations should be
65
- # represented directly using the +Status+ message.
63
+ # * Asynchronous operations. If an API call embeds asynchronous operation
64
+ # results in its response, the status of those operations should be
65
+ # represented directly using the +Status+ message.
66
66
  #
67
- # - Logging. If some API errors are stored in logs, the message +Status+ could
68
- # be used directly after any stripping needed for security/privacy reasons.
67
+ # * Logging. If some API errors are stored in logs, the message +Status+ could
68
+ # be used directly after any stripping needed for security/privacy reasons.
69
69
  # @!attribute [rw] code
70
70
  # @return [Integer]
71
- # The status code, which should be an enum value of Google::Rpc::Code.
71
+ # The status code, which should be an enum value of {Google::Rpc::Code}.
72
72
  # @!attribute [rw] message
73
73
  # @return [String]
74
74
  # A developer-facing error message, which should be in English. Any
75
75
  # user-facing error message should be localized and sent in the
76
- # Google::Rpc::Status#details field, or localized by the client.
76
+ # {Google::Rpc::Status#details} field, or localized by the client.
77
77
  # @!attribute [rw] details
78
78
  # @return [Array<Google::Protobuf::Any>]
79
79
  # A list of messages that carry the error details. There will be a
@@ -165,11 +165,15 @@ module Google
165
165
  # Performs synchronous speech recognition: receive results after all audio
166
166
  # has been sent and processed.
167
167
  #
168
- # @param config [Google::Cloud::Speech::V1::RecognitionConfig]
168
+ # @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
169
169
  # *Required* Provides information to the recognizer that specifies how to
170
170
  # process the request.
171
- # @param audio [Google::Cloud::Speech::V1::RecognitionAudio]
171
+ # A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
172
+ # can also be provided.
173
+ # @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
172
174
  # *Required* The audio data to be recognized.
175
+ # A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
176
+ # can also be provided.
173
177
  # @param options [Google::Gax::CallOptions]
174
178
  # Overrides the default settings for this call, e.g, timeout,
175
179
  # retries, etc.
@@ -178,32 +182,28 @@ module Google
178
182
  # @example
179
183
  # require "google/cloud/speech/v1"
180
184
  #
181
- # AudioEncoding = Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding
182
- # RecognitionAudio = Google::Cloud::Speech::V1::RecognitionAudio
183
- # RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
184
- # SpeechClient = Google::Cloud::Speech::V1::SpeechClient
185
- #
186
- # speech_client = SpeechClient.new
187
- # encoding = AudioEncoding::FLAC
185
+ # speech_client = Google::Cloud::Speech::V1::SpeechClient.new
186
+ # encoding = :FLAC
188
187
  # sample_rate_hertz = 44100
189
188
  # language_code = "en-US"
190
- # config = RecognitionConfig.new
191
- # config.encoding = encoding
192
- # config.sample_rate_hertz = sample_rate_hertz
193
- # config.language_code = language_code
189
+ # config = {
190
+ # encoding: encoding,
191
+ # sample_rate_hertz: sample_rate_hertz,
192
+ # language_code: language_code
193
+ # }
194
194
  # uri = "gs://bucket_name/file_name.flac"
195
- # audio = RecognitionAudio.new
196
- # audio.uri = uri
195
+ # audio = { uri: uri }
197
196
  # response = speech_client.recognize(config, audio)
198
197
 
199
198
  def recognize \
200
199
  config,
201
200
  audio,
202
201
  options: nil
203
- req = Google::Cloud::Speech::V1::RecognizeRequest.new({
202
+ req = {
204
203
  config: config,
205
204
  audio: audio
206
- }.delete_if { |_, v| v.nil? })
205
+ }.delete_if { |_, v| v.nil? }
206
+ req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::RecognizeRequest)
207
207
  @recognize.call(req, options)
208
208
  end
209
209
 
@@ -212,11 +212,15 @@ module Google
212
212
  # +Operation.error+ or an +Operation.response+ which contains
213
213
  # a +LongRunningRecognizeResponse+ message.
214
214
  #
215
- # @param config [Google::Cloud::Speech::V1::RecognitionConfig]
215
+ # @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
216
216
  # *Required* Provides information to the recognizer that specifies how to
217
217
  # process the request.
218
- # @param audio [Google::Cloud::Speech::V1::RecognitionAudio]
218
+ # A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
219
+ # can also be provided.
220
+ # @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
219
221
  # *Required* The audio data to be recognized.
222
+ # A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
223
+ # can also be provided.
220
224
  # @param options [Google::Gax::CallOptions]
221
225
  # Overrides the default settings for this call, e.g, timeout,
222
226
  # retries, etc.
@@ -225,22 +229,17 @@ module Google
225
229
  # @example
226
230
  # require "google/cloud/speech/v1"
227
231
  #
228
- # AudioEncoding = Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding
229
- # RecognitionAudio = Google::Cloud::Speech::V1::RecognitionAudio
230
- # RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
231
- # SpeechClient = Google::Cloud::Speech::V1::SpeechClient
232
- #
233
- # speech_client = SpeechClient.new
234
- # encoding = AudioEncoding::FLAC
232
+ # speech_client = Google::Cloud::Speech::V1::SpeechClient.new
233
+ # encoding = :FLAC
235
234
  # sample_rate_hertz = 44100
236
235
  # language_code = "en-US"
237
- # config = RecognitionConfig.new
238
- # config.encoding = encoding
239
- # config.sample_rate_hertz = sample_rate_hertz
240
- # config.language_code = language_code
236
+ # config = {
237
+ # encoding: encoding,
238
+ # sample_rate_hertz: sample_rate_hertz,
239
+ # language_code: language_code
240
+ # }
241
241
  # uri = "gs://bucket_name/file_name.flac"
242
- # audio = RecognitionAudio.new
243
- # audio.uri = uri
242
+ # audio = { uri: uri }
244
243
  #
245
244
  # # Register a callback during the method call.
246
245
  # operation = speech_client.long_running_recognize(config, audio) do |op|
@@ -273,10 +272,11 @@ module Google
273
272
  config,
274
273
  audio,
275
274
  options: nil
276
- req = Google::Cloud::Speech::V1::LongRunningRecognizeRequest.new({
275
+ req = {
277
276
  config: config,
278
277
  audio: audio
279
- }.delete_if { |_, v| v.nil? })
278
+ }.delete_if { |_, v| v.nil? }
279
+ req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::LongRunningRecognizeRequest)
280
280
  operation = Google::Gax::Operation.new(
281
281
  @long_running_recognize.call(req, options),
282
282
  @operations_client,
@@ -309,11 +309,8 @@ module Google
309
309
  # @example
310
310
  # require "google/cloud/speech/v1"
311
311
  #
312
- # SpeechClient = Google::Cloud::Speech::V1::SpeechClient
313
- # StreamingRecognizeRequest = Google::Cloud::Speech::V1::StreamingRecognizeRequest
314
- #
315
- # speech_client = SpeechClient.new
316
- # request = StreamingRecognizeRequest.new
312
+ # speech_client = Google::Cloud::Speech::V1::SpeechClient.new
313
+ # request = {}
317
314
  # requests = [request]
318
315
  # speech_client.streaming_recognize(requests).each do |element|
319
316
  # # Process element.
@@ -16,7 +16,7 @@
16
16
  module Google
17
17
  module Cloud
18
18
  module Speech
19
- VERSION = "0.25.0"
19
+ VERSION = "0.26.0"
20
20
  end
21
21
  end
22
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-speech
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.25.0
4
+ version: 0.26.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Moore
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-07-11 00:00:00.000000000 Z
12
+ date: 2017-07-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: google-cloud-core
@@ -179,6 +179,7 @@ files:
179
179
  - lib/google-cloud-speech.rb
180
180
  - lib/google/cloud/speech.rb
181
181
  - lib/google/cloud/speech/audio.rb
182
+ - lib/google/cloud/speech/convert.rb
182
183
  - lib/google/cloud/speech/credentials.rb
183
184
  - lib/google/cloud/speech/operation.rb
184
185
  - lib/google/cloud/speech/project.rb
@@ -190,6 +191,7 @@ files:
190
191
  - lib/google/cloud/speech/v1/cloud_speech_services_pb.rb
191
192
  - lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb
192
193
  - lib/google/cloud/speech/v1/doc/google/protobuf/any.rb
194
+ - lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb
193
195
  - lib/google/cloud/speech/v1/doc/google/rpc/status.rb
194
196
  - lib/google/cloud/speech/v1/speech_client.rb
195
197
  - lib/google/cloud/speech/v1/speech_client_config.json