google-cloud-speech 0.25.0 → 0.26.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ccecf5215ecbd7fd0e0ef074b7638711905b8450
4
- data.tar.gz: f4c270117ad7bc3611e435fb598b5dd6bcbc835c
3
+ metadata.gz: 454b1310e5e39c2b4ac676c26b6aea8a89f452c5
4
+ data.tar.gz: 12fa7c7129f20bdf92c5e61c3e1e294307937f1f
5
5
  SHA512:
6
- metadata.gz: d034fa22e135e05a08e14b09ffc2cb075a5cd7757f717a7635bfbd551539d853935670664c21cd636f1b2f2bd68da0d769f2747c8f7b0002b79628d49360197e
7
- data.tar.gz: 5b3101e327dbb801a311abc8cc0dcaf2ec3774cce69c22f8bd629fff62701bc9ab7b8717ef4b923dc0c95d0d4d3efb32642fd876d0d0dbe91cb4d99a0ed0fb0d
6
+ metadata.gz: dca3bf30a3edad90e5dec095183a4a721491b2cef886ecc82fa4a83afe6fdc3c5e1387b336f9d0a96286b6605dc58cead675242e8d04338b2c44c17896c9b3fd
7
+ data.tar.gz: 6ce650dbbeb8cccac3a3e63c290b191ff3c92436d65cb1a1887d6e3480ca4707dfd326017186f3da278f913e70057258d03469df503f9fcd0ce6c29a944e4fb5
@@ -191,6 +191,10 @@ module Google
191
191
  # phrases "hints" so that the speech recognition is more likely to
192
192
  # recognize them. See [usage
193
193
  # limits](https://cloud.google.com/speech/limits#content). Optional.
194
+ # @param [Boolean] words When `true`, return a list of words with
195
+ # additional information about each word. Currently, the only
196
+ # additional information provided is the the start and end time
197
+ # offsets. See {Result#words}. Default is `false`.
194
198
  #
195
199
  # @return [Array<Result>] The transcribed text of audio recognized.
196
200
  #
@@ -209,14 +213,15 @@ module Google
209
213
  # result.transcript #=> "how old is the Brooklyn Bridge"
210
214
  # result.confidence #=> 0.9826789498329163
211
215
  #
212
- def recognize max_alternatives: nil, profanity_filter: nil, phrases: nil
216
+ def recognize max_alternatives: nil, profanity_filter: nil,
217
+ phrases: nil, words: nil
213
218
  ensure_speech!
214
219
 
215
220
  speech.recognize self, encoding: encoding, sample_rate: sample_rate,
216
221
  language: language,
217
222
  max_alternatives: max_alternatives,
218
223
  profanity_filter: profanity_filter,
219
- phrases: phrases
224
+ phrases: phrases, words: words
220
225
  end
221
226
 
222
227
  ##
@@ -239,6 +244,10 @@ module Google
239
244
  # phrases "hints" so that the speech recognition is more likely to
240
245
  # recognize them. See [usage
241
246
  # limits](https://cloud.google.com/speech/limits#content). Optional.
247
+ # @param [Boolean] words When `true`, return a list of words with
248
+ # additional information about each word. Currently, the only
249
+ # additional information provided is the the start and end time
250
+ # offsets. See {Result#words}. Default is `false`.
242
251
  #
243
252
  # @return [Operation] A resource represents the long-running,
244
253
  # asynchronous processing of a speech-recognition operation.
@@ -260,7 +269,7 @@ module Google
260
269
  # results = op.results
261
270
  #
262
271
  def process max_alternatives: nil, profanity_filter: nil,
263
- phrases: nil
272
+ phrases: nil, words: nil
264
273
  ensure_speech!
265
274
 
266
275
  speech.process self, encoding: encoding,
@@ -268,7 +277,7 @@ module Google
268
277
  language: language,
269
278
  max_alternatives: max_alternatives,
270
279
  profanity_filter: profanity_filter,
271
- phrases: phrases
280
+ phrases: phrases, words: words
272
281
  end
273
282
  alias_method :long_running_recognize, :process
274
283
  alias_method :recognize_job, :process
@@ -0,0 +1,46 @@
1
+ # Copyright 2017 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/protobuf/duration_pb"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Speech
21
+ ##
22
+ # @private Helper module for converting Speech values.
23
+ module Convert
24
+ module ClassMethods
25
+ def number_to_duration number
26
+ return nil if number.nil?
27
+
28
+ Google::Protobuf::Duration.new \
29
+ seconds: number.to_i,
30
+ nanos: (number.remainder(1) * 1000000000).round
31
+ end
32
+
33
+ def duration_to_number duration
34
+ return nil if duration.nil?
35
+
36
+ return duration.seconds if duration.nanos == 0
37
+
38
+ duration.seconds + (duration.nanos / 1000000000.0)
39
+ end
40
+ end
41
+
42
+ extend ClassMethods
43
+ end
44
+ end
45
+ end
46
+ end
@@ -266,6 +266,10 @@ module Google
266
266
  # phrases "hints" so that the speech recognition is more likely to
267
267
  # recognize them. See [usage
268
268
  # limits](https://cloud.google.com/speech/limits#content). Optional.
269
+ # @param [Boolean] words When `true`, return a list of words with
270
+ # additional information about each word. Currently, the only
271
+ # additional information provided is the the start and end time
272
+ # offsets. See {Result#words}. Default is `false`.
269
273
  #
270
274
  # @return [Array<Result>] The transcribed text of audio recognized.
271
275
  #
@@ -308,7 +312,8 @@ module Google
308
312
  # max_alternatives: 10
309
313
  #
310
314
  def recognize source, encoding: nil, language: nil, sample_rate: nil,
311
- max_alternatives: nil, profanity_filter: nil, phrases: nil
315
+ max_alternatives: nil, profanity_filter: nil,
316
+ phrases: nil, words: nil
312
317
  ensure_service!
313
318
 
314
319
  audio_obj = audio source, encoding: encoding, language: language,
@@ -317,7 +322,8 @@ module Google
317
322
  config = audio_config(
318
323
  encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
319
324
  language: audio_obj.language, max_alternatives: max_alternatives,
320
- profanity_filter: profanity_filter, phrases: phrases)
325
+ profanity_filter: profanity_filter, phrases: phrases,
326
+ words: words)
321
327
 
322
328
  grpc = service.recognize_sync audio_obj.to_grpc, config
323
329
  grpc.results.map do |result_grpc|
@@ -388,6 +394,10 @@ module Google
388
394
  # phrases "hints" so that the speech recognition is more likely to
389
395
  # recognize them. See [usage
390
396
  # limits](https://cloud.google.com/speech/limits#content). Optional.
397
+ # @param [Boolean] words When `true`, return a list of words with
398
+ # additional information about each word. Currently, the only
399
+ # additional information provided is the the start and end time
400
+ # offsets. See {Result#words}. Default is `false`.
391
401
  #
392
402
  # @return [Operation] A resource represents the long-running,
393
403
  # asynchronous processing of a speech-recognition operation.
@@ -440,7 +450,8 @@ module Google
440
450
  # op.reload!
441
451
  #
442
452
  def process source, encoding: nil, sample_rate: nil, language: nil,
443
- max_alternatives: nil, profanity_filter: nil, phrases: nil
453
+ max_alternatives: nil, profanity_filter: nil, phrases: nil,
454
+ words: nil
444
455
  ensure_service!
445
456
 
446
457
  audio_obj = audio source, encoding: encoding, language: language,
@@ -449,7 +460,8 @@ module Google
449
460
  config = audio_config(
450
461
  encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
451
462
  language: audio_obj.language, max_alternatives: max_alternatives,
452
- profanity_filter: profanity_filter, phrases: phrases)
463
+ profanity_filter: profanity_filter, phrases: phrases,
464
+ words: words)
453
465
 
454
466
  grpc = service.recognize_async audio_obj.to_grpc, config
455
467
  Operation.from_grpc grpc
@@ -513,6 +525,10 @@ module Google
513
525
  # phrases "hints" so that the speech recognition is more likely to
514
526
  # recognize them. See [usage
515
527
  # limits](https://cloud.google.com/speech/limits#content). Optional.
528
+ # @param [Boolean] words When `true`, return a list of words with
529
+ # additional information about each word. Currently, the only
530
+ # additional information provided is the the start and end time
531
+ # offsets. See {Result#words}. Default is `false`.
516
532
  # @param [Boolean] utterance When `true`, the service will perform
517
533
  # continuous recognition (continuing to process audio even if the user
518
534
  # pauses speaking) until the client closes the output stream (gRPC
@@ -550,7 +566,7 @@ module Google
550
566
  #
551
567
  def stream encoding: nil, language: nil, sample_rate: nil,
552
568
  max_alternatives: nil, profanity_filter: nil, phrases: nil,
553
- utterance: nil, interim: nil
569
+ words: nil, utterance: nil, interim: nil
554
570
  ensure_service!
555
571
 
556
572
  grpc_req = V1::StreamingRecognizeRequest.new(
@@ -561,7 +577,7 @@ module Google
561
577
  sample_rate: sample_rate,
562
578
  max_alternatives: max_alternatives,
563
579
  profanity_filter: profanity_filter,
564
- phrases: phrases),
580
+ phrases: phrases, words: words),
565
581
  single_utterance: utterance,
566
582
  interim_results: interim
567
583
  }.delete_if { |_, v| v.nil? }
@@ -608,7 +624,7 @@ module Google
608
624
 
609
625
  def audio_config encoding: nil, language: nil, sample_rate: nil,
610
626
  max_alternatives: nil, profanity_filter: nil,
611
- phrases: nil
627
+ phrases: nil, words: nil
612
628
  contexts = nil
613
629
  contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
614
630
  language = String(language) unless language.nil?
@@ -618,7 +634,8 @@ module Google
618
634
  sample_rate_hertz: sample_rate,
619
635
  max_alternatives: max_alternatives,
620
636
  profanity_filter: profanity_filter,
621
- speech_contexts: contexts
637
+ speech_contexts: contexts,
638
+ enable_word_time_offsets: words
622
639
  }.delete_if { |_, v| v.nil? })
623
640
  end
624
641
 
@@ -14,6 +14,7 @@
14
14
 
15
15
 
16
16
  require "google/cloud/speech/v1"
17
+ require "google/cloud/speech/convert"
17
18
 
18
19
  module Google
19
20
  module Cloud
@@ -35,6 +36,10 @@ module Google
35
36
  # recognition is correct. This field is typically provided only for the
36
37
  # top hypothesis. A value of 0.0 is a sentinel value indicating
37
38
  # confidence was not set.
39
+ # @attr_reader [Array<Result::Word>] words A list of words with additional
40
+ # information about each word. Currently, the only additional
41
+ # information provided is the the start and end time offsets. Available
42
+ # when using the `words` argument in relevant methods.
38
43
  # @attr_reader [Array<Result::Alternative>] alternatives Additional
39
44
  # recognition hypotheses (up to the value specified in
40
45
  # `max_alternatives`). The server may return fewer than
@@ -56,13 +61,14 @@ module Google
56
61
  # result.confidence #=> 0.9826789498329163
57
62
  #
58
63
  class Result
59
- attr_reader :transcript, :confidence, :alternatives
64
+ attr_reader :transcript, :confidence, :words, :alternatives
60
65
 
61
66
  ##
62
67
  # @private Creates a new Results instance.
63
- def initialize transcript, confidence, alternatives = []
64
- @transcript = transcript
65
- @confidence = confidence
68
+ def initialize transcript, confidence, words = [], alternatives = []
69
+ @transcript = transcript
70
+ @confidence = confidence
71
+ @words = words
66
72
  @alternatives = alternatives
67
73
  end
68
74
 
@@ -71,10 +77,42 @@ module Google
71
77
  def self.from_grpc grpc
72
78
  head, *tail = *grpc.alternatives
73
79
  return nil if head.nil?
80
+ words = Array(head.words).map do |w|
81
+ Word.new w.word, Convert.duration_to_number(w.start_time),
82
+ Convert.duration_to_number(w.end_time)
83
+ end
74
84
  alternatives = tail.map do |alt|
75
85
  Alternative.new alt.transcript, alt.confidence
76
86
  end
77
- new head.transcript, head.confidence, alternatives
87
+ new head.transcript, head.confidence, words, alternatives
88
+ end
89
+
90
+ ##
91
+ # Word-specific information for recognized words. Currently, the only
92
+ # additional information provided is the the start and end time offsets.
93
+ # Available when using the `words` argument in relevant methods.
94
+ #
95
+ # @attr_reader [String] word The word corresponding to this set of
96
+ # information.
97
+ # @attr_reader [Numeric] start_time Time offset relative to the
98
+ # beginning of the audio, and corresponding to the start of the spoken
99
+ # word. This field is only set if `words` was specified. This is an
100
+ # experimental feature and the accuracy of the time offset can vary.
101
+ # @attr_reader [Numeric] end_time Time offset relative to the
102
+ # beginning of the audio, and corresponding to the end of the spoken
103
+ # word. This field is only set if `words` was specified. This is an
104
+ # experimental feature and the accuracy of the time offset can vary.
105
+ class Word
106
+ attr_reader :word, :start_time, :end_time
107
+ alias_method :to_str, :word
108
+
109
+ ##
110
+ # @private Creates a new Result::Word instance.
111
+ def initialize word, start_time, end_time
112
+ @word = word
113
+ @start_time = start_time
114
+ @end_time = end_time
115
+ end
78
116
  end
79
117
 
80
118
  ##
@@ -114,7 +152,7 @@ module Google
114
152
  ##
115
153
  # @private Creates a new Result::Alternative instance.
116
154
  def initialize transcript, confidence
117
- @transcript = transcript
155
+ @transcript = transcript
118
156
  @confidence = confidence
119
157
  end
120
158
  end
@@ -1,4 +1,5 @@
1
1
  # Copyright 2017, Google Inc. All rights reserved.
2
+ #
2
3
  # Licensed under the Apache License, Version 2.0 (the "License");
3
4
  # you may not use this file except in compliance with the License.
4
5
  # You may obtain a copy of the License at
@@ -11,4 +12,71 @@
11
12
  # See the License for the specific language governing permissions and
12
13
  # limitations under the License.
13
14
 
15
+ module Google
16
+ module Cloud
17
+ # rubocop:disable LineLength
18
+
19
+ ##
20
+ # # Ruby Client for Google Cloud Speech API ([Alpha](https://github.com/GoogleCloudPlatform/google-cloud-ruby#versioning))
21
+ #
22
+ # [Google Cloud Speech API][Product Documentation]: Google Cloud Speech API.
23
+ # - [Product Documentation][]
24
+ #
25
+ # ## Quick Start
26
+ # In order to use this library, you first need to go through the following steps:
27
+ #
28
+ # 1. [Select or create a Cloud Platform project.](https://console.cloud.google.com/project)
29
+ # 2. [Enable the Google Cloud Speech API.](https://console.cloud.google.com/apis/api/speech)
30
+ # 3. [Setup Authentication.](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud/master/guides/authentication)
31
+ #
32
+ # ### Installation
33
+ # ```
34
+ # $ gem install google-cloud-speech
35
+ # ```
36
+ #
37
+ # ### Preview
38
+ # #### SpeechClient
39
+ # ```rb
40
+ # require "google/cloud/speech/v1"
41
+ #
42
+ # speech_client = Google::Cloud::Speech::V1::SpeechClient.new
43
+ # language_code = "en-US"
44
+ # sample_rate_hertz = 44100
45
+ # encoding = :FLAC
46
+ # config = {
47
+ # language_code: language_code,
48
+ # sample_rate_hertz: sample_rate_hertz,
49
+ # encoding: encoding
50
+ # }
51
+ # uri = "gs://gapic-toolkit/hello.flac"
52
+ # audio = { uri: uri }
53
+ # response = speech_client.recognize(config, audio)
54
+ # ```
55
+ #
56
+ # ### Next Steps
57
+ # - Read the [Google Cloud Speech API Product documentation][Product Documentation] to learn more about the product and see How-to Guides.
58
+ # - View this [repository's main README](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/README.md) to see the full list of Cloud APIs that we cover.
59
+ #
60
+ # [Product Documentation]: https://cloud.google.com/speech
61
+ #
62
+ module Speech
63
+ # rubocop:enable LineLength
64
+
65
+ ##
66
+ # # Google Cloud Speech API Contents
67
+ #
68
+ # | Class | Description |
69
+ # | ----- | ----------- |
70
+ # | [SpeechClient][] | Google Cloud Speech API. |
71
+ # | [Data Types][] | Data types for Google::Cloud::Speech::V1 |
72
+ #
73
+ # [SpeechClient]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/speechclient
74
+ # [Data Types]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/datatypes
75
+ #
76
+ module V1
77
+ end
78
+ end
79
+ end
80
+ end
81
+
14
82
  require "google/cloud/speech/v1/speech_client"
@@ -36,6 +36,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
36
36
  optional :max_alternatives, :int32, 4
37
37
  optional :profanity_filter, :bool, 5
38
38
  repeated :speech_contexts, :message, 6, "google.cloud.speech.v1.SpeechContext"
39
+ optional :enable_word_time_offsets, :bool, 8
39
40
  end
40
41
  add_enum "google.cloud.speech.v1.RecognitionConfig.AudioEncoding" do
41
42
  value :ENCODING_UNSPECIFIED, 0
@@ -87,6 +88,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
87
88
  add_message "google.cloud.speech.v1.SpeechRecognitionAlternative" do
88
89
  optional :transcript, :string, 1
89
90
  optional :confidence, :float, 2
91
+ repeated :words, :message, 3, "google.cloud.speech.v1.WordInfo"
92
+ end
93
+ add_message "google.cloud.speech.v1.WordInfo" do
94
+ optional :start_time, :message, 1, "google.protobuf.Duration"
95
+ optional :end_time, :message, 2, "google.protobuf.Duration"
96
+ optional :word, :string, 3
90
97
  end
91
98
  end
92
99
 
@@ -110,6 +117,7 @@ module Google
110
117
  StreamingRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionResult").msgclass
111
118
  SpeechRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionResult").msgclass
112
119
  SpeechRecognitionAlternative = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionAlternative").msgclass
120
+ WordInfo = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.WordInfo").msgclass
113
121
  end
114
122
  end
115
123
  end
@@ -56,7 +56,7 @@ module Google
56
56
  # +audio_content+ data. The audio bytes must be encoded as specified in
57
57
  # +RecognitionConfig+. Note: as with all bytes fields, protobuffers use a
58
58
  # pure binary representation (not base64). See
59
- # {audio limits}[https://cloud.google.com/speech/limits#content].
59
+ # [audio limits](https://cloud.google.com/speech/limits#content).
60
60
  class StreamingRecognizeRequest; end
61
61
 
62
62
  # Provides information to the recognizer that specifies how to process the
@@ -101,9 +101,9 @@ module Google
101
101
  # @!attribute [rw] language_code
102
102
  # @return [String]
103
103
  # *Required* The language of the supplied audio as a
104
- # {BCP-47}[https://www.rfc-editor.org/rfc/bcp/bcp47.txt] language tag.
104
+ # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
105
105
  # Example: "en-US".
106
- # See {Language Support}[https://cloud.google.com/speech/docs/languages]
106
+ # See [Language Support](https://cloud.google.com/speech/docs/languages)
107
107
  # for a list of the currently supported language codes.
108
108
  # @!attribute [rw] max_alternatives
109
109
  # @return [Integer]
@@ -122,11 +122,17 @@ module Google
122
122
  # @!attribute [rw] speech_contexts
123
123
  # @return [Array<Google::Cloud::Speech::V1::SpeechContext>]
124
124
  # *Optional* A means to provide context to assist the speech recognition.
125
+ # @!attribute [rw] enable_word_time_offsets
126
+ # @return [true, false]
127
+ # *Optional* If +true+, the top result includes a list of words and
128
+ # the start and end time offsets (timestamps) for those words. If
129
+ # +false+, no word-level time offset information is returned. The default is
130
+ # +false+.
125
131
  class RecognitionConfig
126
132
  # Audio encoding of the data sent in the audio message. All encodings support
127
- # only 1 channel (mono) audio. Only +FLAC+ includes a header that describes
128
- # the bytes of audio that follow the header. The other encodings are raw
129
- # audio bytes with no header.
133
+ # only 1 channel (mono) audio. Only +FLAC+ and +WAV+ include a header that
134
+ # describes the bytes of audio that follow the header. The other encodings
135
+ # are raw audio bytes with no header.
130
136
  #
131
137
  # For best results, the audio source should be captured and transmitted using
132
138
  # a lossless encoding (+FLAC+ or +LINEAR16+). Recognition accuracy may be
@@ -134,13 +140,13 @@ module Google
134
140
  # this section, are used to capture or transmit the audio, particularly if
135
141
  # background noise is present.
136
142
  module AudioEncoding
137
- # Not specified. Will return result Google::Rpc::Code::INVALID_ARGUMENT.
143
+ # Not specified. Will return result {Google::Rpc::Code::INVALID_ARGUMENT}.
138
144
  ENCODING_UNSPECIFIED = 0
139
145
 
140
146
  # Uncompressed 16-bit signed little-endian samples (Linear PCM).
141
147
  LINEAR16 = 1
142
148
 
143
- # {+FLAC+}[https://xiph.org/flac/documentation.html] (Free Lossless Audio
149
+ # [+FLAC+](https://xiph.org/flac/documentation.html) (Free Lossless Audio
144
150
  # Codec) is the recommended encoding because it is
145
151
  # lossless--therefore recognition is not compromised--and
146
152
  # requires only about half the bandwidth of +LINEAR16+. +FLAC+ stream
@@ -158,17 +164,17 @@ module Google
158
164
  AMR_WB = 5
159
165
 
160
166
  # Opus encoded audio frames in Ogg container
161
- # ({OggOpus}[https://wiki.xiph.org/OggOpus]).
167
+ # ([OggOpus](https://wiki.xiph.org/OggOpus)).
162
168
  # +sample_rate_hertz+ must be 16000.
163
169
  OGG_OPUS = 6
164
170
 
165
171
  # Although the use of lossy encodings is not recommended, if a very low
166
172
  # bitrate encoding is required, +OGG_OPUS+ is highly preferred over
167
- # Speex encoding. The {Speex}[https://speex.org/] encoding supported by
173
+ # Speex encoding. The [Speex](https://speex.org/) encoding supported by
168
174
  # Cloud Speech API has a header byte in each block, as in MIME type
169
175
  # +audio/x-speex-with-header-byte+.
170
176
  # It is a variant of the RTP Speex encoding defined in
171
- # {RFC 5574}[https://tools.ietf.org/html/rfc5574].
177
+ # [RFC 5574](https://tools.ietf.org/html/rfc5574).
172
178
  # The stream is a sequence of blocks, one block per RTP packet. Each block
173
179
  # starts with a byte containing the length of the block, in bytes, followed
174
180
  # by one or more frames of Speex data, padded to an integral number of
@@ -188,13 +194,13 @@ module Google
188
194
  # to improve the accuracy for specific words and phrases, for example, if
189
195
  # specific commands are typically spoken by the user. This can also be used
190
196
  # to add additional words to the vocabulary of the recognizer. See
191
- # {usage limits}[https://cloud.google.com/speech/limits#content].
197
+ # [usage limits](https://cloud.google.com/speech/limits#content).
192
198
  class SpeechContext; end
193
199
 
194
200
  # Contains audio data in the encoding specified in the +RecognitionConfig+.
195
201
  # Either +content+ or +uri+ must be supplied. Supplying both or neither
196
- # returns Google::Rpc::Code::INVALID_ARGUMENT. See
197
- # {audio limits}[https://cloud.google.com/speech/limits#content].
202
+ # returns {Google::Rpc::Code::INVALID_ARGUMENT}. See
203
+ # [audio limits](https://cloud.google.com/speech/limits#content).
198
204
  # @!attribute [rw] content
199
205
  # @return [String]
200
206
  # The audio data bytes encoded as specified in
@@ -206,8 +212,8 @@ module Google
206
212
  # +RecognitionConfig+. Currently, only Google Cloud Storage URIs are
207
213
  # supported, which must be specified in the following format:
208
214
  # +gs://bucket_name/object_name+ (other URI formats return
209
- # Google::Rpc::Code::INVALID_ARGUMENT). For more information, see
210
- # {Request URIs}[https://cloud.google.com/storage/docs/reference-uris].
215
+ # {Google::Rpc::Code::INVALID_ARGUMENT}). For more information, see
216
+ # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
211
217
  class RecognitionAudio; end
212
218
 
213
219
  # The only message returned to the client by the +Recognize+ method. It
@@ -269,34 +275,32 @@ module Google
269
275
  # 6. results { alternatives { transcript: " that is" } stability: 0.9 }
270
276
  # results { alternatives { transcript: " the question" } stability: 0.01 }
271
277
  #
272
- # 7. speech_event_type: END_OF_SINGLE_UTTERANCE
273
- #
274
- # 8. results { alternatives { transcript: " that is the question"
278
+ # 7. results { alternatives { transcript: " that is the question"
275
279
  # confidence: 0.98 }
276
280
  # alternatives { transcript: " that was the question" }
277
281
  # is_final: true }
278
282
  #
279
283
  # Notes:
280
284
  #
281
- # - Only two of the above responses #4 and #8 contain final results; they are
285
+ # * Only two of the above responses #4 and #7 contain final results; they are
282
286
  # indicated by +is_final: true+. Concatenating these together generates the
283
287
  # full transcript: "to be or not to be that is the question".
284
288
  #
285
- # - The others contain interim +results+. #3 and #6 contain two interim
289
+ # * The others contain interim +results+. #3 and #6 contain two interim
286
290
  # +results+: the first portion has a high stability and is less likely to
287
291
  # change; the second portion has a low stability and is very likely to
288
292
  # change. A UI designer might choose to show only high stability +results+.
289
293
  #
290
- # - The specific +stability+ and +confidence+ values shown above are only for
294
+ # * The specific +stability+ and +confidence+ values shown above are only for
291
295
  # illustrative purposes. Actual values may vary.
292
296
  #
293
- # - In each response, only one of these fields will be set:
294
- # +error+,
295
- # +speech_event_type+, or
296
- # one or more (repeated) +results+.
297
+ # * In each response, only one of these fields will be set:
298
+ # +error+,
299
+ # +speech_event_type+, or
300
+ # one or more (repeated) +results+.
297
301
  # @!attribute [rw] error
298
302
  # @return [Google::Rpc::Status]
299
- # *Output-only* If set, returns a Google::Rpc::Status message that
303
+ # *Output-only* If set, returns a {Google::Rpc::Status} message that
300
304
  # specifies the error for the operation.
301
305
  # @!attribute [rw] results
302
306
  # @return [Array<Google::Cloud::Speech::V1::StreamingRecognitionResult>]
@@ -351,6 +355,8 @@ module Google
351
355
  # @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionAlternative>]
352
356
  # *Output-only* May contain one or more recognition hypotheses (up to the
353
357
  # maximum specified in +max_alternatives+).
358
+ # These alternatives are ordered in terms of accuracy, with the top (first)
359
+ # alternative being the most probable, as ranked by the recognizer.
354
360
  class SpeechRecognitionResult; end
355
361
 
356
362
  # Alternative hypotheses (a.k.a. n-best list).
@@ -363,10 +369,36 @@ module Google
363
369
  # indicates an estimated greater likelihood that the recognized words are
364
370
  # correct. This field is typically provided only for the top hypothesis, and
365
371
  # only for +is_final=true+ results. Clients should not rely on the
366
- # +confidence+ field as it is not guaranteed to be accurate, or even set, in
367
- # any of the results.
372
+ # +confidence+ field as it is not guaranteed to be accurate or consistent.
368
373
  # The default of 0.0 is a sentinel value indicating +confidence+ was not set.
374
+ # @!attribute [rw] words
375
+ # @return [Array<Google::Cloud::Speech::V1::WordInfo>]
376
+ # *Output-only* A list of word-specific information for each recognized word.
369
377
  class SpeechRecognitionAlternative; end
378
+
379
+ # Word-specific information for recognized words. Word information is only
380
+ # included in the response when certain request parameters are set, such
381
+ # as +enable_word_time_offsets+.
382
+ # @!attribute [rw] start_time
383
+ # @return [Google::Protobuf::Duration]
384
+ # *Output-only* Time offset relative to the beginning of the audio,
385
+ # and corresponding to the start of the spoken word.
386
+ # This field is only set if +enable_word_time_offsets=true+ and only
387
+ # in the top hypothesis.
388
+ # This is an experimental feature and the accuracy of the time offset can
389
+ # vary.
390
+ # @!attribute [rw] end_time
391
+ # @return [Google::Protobuf::Duration]
392
+ # *Output-only* Time offset relative to the beginning of the audio,
393
+ # and corresponding to the end of the spoken word.
394
+ # This field is only set if +enable_word_time_offsets=true+ and only
395
+ # in the top hypothesis.
396
+ # This is an experimental feature and the accuracy of the time offset can
397
+ # vary.
398
+ # @!attribute [rw] word
399
+ # @return [String]
400
+ # *Output-only* The word corresponding to this set of information.
401
+ class WordInfo; end
370
402
  end
371
403
  end
372
404
  end
@@ -77,7 +77,7 @@ module Google
77
77
  # If the embedded message type is well-known and has a custom JSON
78
78
  # representation, that representation will be embedded adding a field
79
79
  # +value+ which holds the custom JSON in addition to the +@type+
80
- # field. Example (for message Google::Protobuf::Duration):
80
+ # field. Example (for message {Google::Protobuf::Duration}):
81
81
  #
82
82
  # {
83
83
  # "@type": "type.googleapis.com/google.protobuf.Duration",
@@ -96,7 +96,7 @@ module Google
96
96
  # qualified name of the type (as in +path/google.protobuf.Duration+).
97
97
  # The name should be in a canonical form (e.g., leading "." is
98
98
  # not accepted).
99
- # * An HTTP GET on the URL must yield a Google::Protobuf::Type
99
+ # * An HTTP GET on the URL must yield a {Google::Protobuf::Type}
100
100
  # value in binary format, or produce an error.
101
101
  # * Applications are allowed to cache lookup results based on the
102
102
  # URL, or have them precompiled into a binary to avoid any
@@ -0,0 +1,77 @@
1
+ # Copyright 2017, Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Google
16
+ module Protobuf
17
+ # A Duration represents a signed, fixed-length span of time represented
18
+ # as a count of seconds and fractions of seconds at nanosecond
19
+ # resolution. It is independent of any calendar and concepts like "day"
20
+ # or "month". It is related to Timestamp in that the difference between
21
+ # two Timestamp values is a Duration and it can be added or subtracted
22
+ # from a Timestamp. Range is approximately +-10,000 years.
23
+ #
24
+ # Example 1: Compute Duration from two Timestamps in pseudo code.
25
+ #
26
+ # Timestamp start = ...;
27
+ # Timestamp end = ...;
28
+ # Duration duration = ...;
29
+ #
30
+ # duration.seconds = end.seconds - start.seconds;
31
+ # duration.nanos = end.nanos - start.nanos;
32
+ #
33
+ # if (duration.seconds < 0 && duration.nanos > 0) {
34
+ # duration.seconds += 1;
35
+ # duration.nanos -= 1000000000;
36
+ # } else if (durations.seconds > 0 && duration.nanos < 0) {
37
+ # duration.seconds -= 1;
38
+ # duration.nanos += 1000000000;
39
+ # }
40
+ #
41
+ # Example 2: Compute Timestamp from Timestamp + Duration in pseudo code.
42
+ #
43
+ # Timestamp start = ...;
44
+ # Duration duration = ...;
45
+ # Timestamp end = ...;
46
+ #
47
+ # end.seconds = start.seconds + duration.seconds;
48
+ # end.nanos = start.nanos + duration.nanos;
49
+ #
50
+ # if (end.nanos < 0) {
51
+ # end.seconds -= 1;
52
+ # end.nanos += 1000000000;
53
+ # } else if (end.nanos >= 1000000000) {
54
+ # end.seconds += 1;
55
+ # end.nanos -= 1000000000;
56
+ # }
57
+ #
58
+ # Example 3: Compute Duration from datetime.timedelta in Python.
59
+ #
60
+ # td = datetime.timedelta(days=3, minutes=10)
61
+ # duration = Duration()
62
+ # duration.FromTimedelta(td)
63
+ # @!attribute [rw] seconds
64
+ # @return [Integer]
65
+ # Signed seconds of the span of time. Must be from -315,576,000,000
66
+ # to +315,576,000,000 inclusive.
67
+ # @!attribute [rw] nanos
68
+ # @return [Integer]
69
+ # Signed fractions of a second at nanosecond resolution of the span
70
+ # of time. Durations less than one second are represented with a 0
71
+ # +seconds+ field and a positive or negative +nanos+ field. For durations
72
+ # of one second or more, a non-zero value for the +nanos+ field must be
73
+ # of the same sign as the +seconds+ field. Must be from -999,999,999
74
+ # to +999,999,999 inclusive.
75
+ class Duration; end
76
+ end
77
+ end
@@ -16,16 +16,16 @@ module Google
16
16
  module Rpc
17
17
  # The +Status+ type defines a logical error model that is suitable for different
18
18
  # programming environments, including REST APIs and RPC APIs. It is used by
19
- # {gRPC}[https://github.com/grpc]. The error model is designed to be:
19
+ # [gRPC](https://github.com/grpc). The error model is designed to be:
20
20
  #
21
- # - Simple to use and understand for most users
22
- # - Flexible enough to meet unexpected needs
21
+ # * Simple to use and understand for most users
22
+ # * Flexible enough to meet unexpected needs
23
23
  #
24
24
  # = Overview
25
25
  #
26
26
  # The +Status+ message contains three pieces of data: error code, error message,
27
27
  # and error details. The error code should be an enum value of
28
- # Google::Rpc::Code, but it may accept additional error codes if needed. The
28
+ # {Google::Rpc::Code}, but it may accept additional error codes if needed. The
29
29
  # error message should be a developer-facing English message that helps
30
30
  # developers *understand* and *resolve* the error. If a localized user-facing
31
31
  # error message is needed, put the localized message in the error details or
@@ -49,31 +49,31 @@ module Google
49
49
  #
50
50
  # Example uses of this error model include:
51
51
  #
52
- # - Partial errors. If a service needs to return partial errors to the client,
53
- # it may embed the +Status+ in the normal response to indicate the partial
54
- # errors.
52
+ # * Partial errors. If a service needs to return partial errors to the client,
53
+ # it may embed the +Status+ in the normal response to indicate the partial
54
+ # errors.
55
55
  #
56
- # - Workflow errors. A typical workflow has multiple steps. Each step may
57
- # have a +Status+ message for error reporting purpose.
56
+ # * Workflow errors. A typical workflow has multiple steps. Each step may
57
+ # have a +Status+ message for error reporting purpose.
58
58
  #
59
- # - Batch operations. If a client uses batch request and batch response, the
60
- # +Status+ message should be used directly inside batch response, one for
61
- # each error sub-response.
59
+ # * Batch operations. If a client uses batch request and batch response, the
60
+ # +Status+ message should be used directly inside batch response, one for
61
+ # each error sub-response.
62
62
  #
63
- # - Asynchronous operations. If an API call embeds asynchronous operation
64
- # results in its response, the status of those operations should be
65
- # represented directly using the +Status+ message.
63
+ # * Asynchronous operations. If an API call embeds asynchronous operation
64
+ # results in its response, the status of those operations should be
65
+ # represented directly using the +Status+ message.
66
66
  #
67
- # - Logging. If some API errors are stored in logs, the message +Status+ could
68
- # be used directly after any stripping needed for security/privacy reasons.
67
+ # * Logging. If some API errors are stored in logs, the message +Status+ could
68
+ # be used directly after any stripping needed for security/privacy reasons.
69
69
  # @!attribute [rw] code
70
70
  # @return [Integer]
71
- # The status code, which should be an enum value of Google::Rpc::Code.
71
+ # The status code, which should be an enum value of {Google::Rpc::Code}.
72
72
  # @!attribute [rw] message
73
73
  # @return [String]
74
74
  # A developer-facing error message, which should be in English. Any
75
75
  # user-facing error message should be localized and sent in the
76
- # Google::Rpc::Status#details field, or localized by the client.
76
+ # {Google::Rpc::Status#details} field, or localized by the client.
77
77
  # @!attribute [rw] details
78
78
  # @return [Array<Google::Protobuf::Any>]
79
79
  # A list of messages that carry the error details. There will be a
@@ -165,11 +165,15 @@ module Google
165
165
  # Performs synchronous speech recognition: receive results after all audio
166
166
  # has been sent and processed.
167
167
  #
168
- # @param config [Google::Cloud::Speech::V1::RecognitionConfig]
168
+ # @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
169
169
  # *Required* Provides information to the recognizer that specifies how to
170
170
  # process the request.
171
- # @param audio [Google::Cloud::Speech::V1::RecognitionAudio]
171
+ # A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
172
+ # can also be provided.
173
+ # @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
172
174
  # *Required* The audio data to be recognized.
175
+ # A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
176
+ # can also be provided.
173
177
  # @param options [Google::Gax::CallOptions]
174
178
  # Overrides the default settings for this call, e.g, timeout,
175
179
  # retries, etc.
@@ -178,32 +182,28 @@ module Google
178
182
  # @example
179
183
  # require "google/cloud/speech/v1"
180
184
  #
181
- # AudioEncoding = Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding
182
- # RecognitionAudio = Google::Cloud::Speech::V1::RecognitionAudio
183
- # RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
184
- # SpeechClient = Google::Cloud::Speech::V1::SpeechClient
185
- #
186
- # speech_client = SpeechClient.new
187
- # encoding = AudioEncoding::FLAC
185
+ # speech_client = Google::Cloud::Speech::V1::SpeechClient.new
186
+ # encoding = :FLAC
188
187
  # sample_rate_hertz = 44100
189
188
  # language_code = "en-US"
190
- # config = RecognitionConfig.new
191
- # config.encoding = encoding
192
- # config.sample_rate_hertz = sample_rate_hertz
193
- # config.language_code = language_code
189
+ # config = {
190
+ # encoding: encoding,
191
+ # sample_rate_hertz: sample_rate_hertz,
192
+ # language_code: language_code
193
+ # }
194
194
  # uri = "gs://bucket_name/file_name.flac"
195
- # audio = RecognitionAudio.new
196
- # audio.uri = uri
195
+ # audio = { uri: uri }
197
196
  # response = speech_client.recognize(config, audio)
198
197
 
199
198
  def recognize \
200
199
  config,
201
200
  audio,
202
201
  options: nil
203
- req = Google::Cloud::Speech::V1::RecognizeRequest.new({
202
+ req = {
204
203
  config: config,
205
204
  audio: audio
206
- }.delete_if { |_, v| v.nil? })
205
+ }.delete_if { |_, v| v.nil? }
206
+ req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::RecognizeRequest)
207
207
  @recognize.call(req, options)
208
208
  end
209
209
 
@@ -212,11 +212,15 @@ module Google
212
212
  # +Operation.error+ or an +Operation.response+ which contains
213
213
  # a +LongRunningRecognizeResponse+ message.
214
214
  #
215
- # @param config [Google::Cloud::Speech::V1::RecognitionConfig]
215
+ # @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
216
216
  # *Required* Provides information to the recognizer that specifies how to
217
217
  # process the request.
218
- # @param audio [Google::Cloud::Speech::V1::RecognitionAudio]
218
+ # A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
219
+ # can also be provided.
220
+ # @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
219
221
  # *Required* The audio data to be recognized.
222
+ # A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
223
+ # can also be provided.
220
224
  # @param options [Google::Gax::CallOptions]
221
225
  # Overrides the default settings for this call, e.g, timeout,
222
226
  # retries, etc.
@@ -225,22 +229,17 @@ module Google
225
229
  # @example
226
230
  # require "google/cloud/speech/v1"
227
231
  #
228
- # AudioEncoding = Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding
229
- # RecognitionAudio = Google::Cloud::Speech::V1::RecognitionAudio
230
- # RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
231
- # SpeechClient = Google::Cloud::Speech::V1::SpeechClient
232
- #
233
- # speech_client = SpeechClient.new
234
- # encoding = AudioEncoding::FLAC
232
+ # speech_client = Google::Cloud::Speech::V1::SpeechClient.new
233
+ # encoding = :FLAC
235
234
  # sample_rate_hertz = 44100
236
235
  # language_code = "en-US"
237
- # config = RecognitionConfig.new
238
- # config.encoding = encoding
239
- # config.sample_rate_hertz = sample_rate_hertz
240
- # config.language_code = language_code
236
+ # config = {
237
+ # encoding: encoding,
238
+ # sample_rate_hertz: sample_rate_hertz,
239
+ # language_code: language_code
240
+ # }
241
241
  # uri = "gs://bucket_name/file_name.flac"
242
- # audio = RecognitionAudio.new
243
- # audio.uri = uri
242
+ # audio = { uri: uri }
244
243
  #
245
244
  # # Register a callback during the method call.
246
245
  # operation = speech_client.long_running_recognize(config, audio) do |op|
@@ -273,10 +272,11 @@ module Google
273
272
  config,
274
273
  audio,
275
274
  options: nil
276
- req = Google::Cloud::Speech::V1::LongRunningRecognizeRequest.new({
275
+ req = {
277
276
  config: config,
278
277
  audio: audio
279
- }.delete_if { |_, v| v.nil? })
278
+ }.delete_if { |_, v| v.nil? }
279
+ req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::LongRunningRecognizeRequest)
280
280
  operation = Google::Gax::Operation.new(
281
281
  @long_running_recognize.call(req, options),
282
282
  @operations_client,
@@ -309,11 +309,8 @@ module Google
309
309
  # @example
310
310
  # require "google/cloud/speech/v1"
311
311
  #
312
- # SpeechClient = Google::Cloud::Speech::V1::SpeechClient
313
- # StreamingRecognizeRequest = Google::Cloud::Speech::V1::StreamingRecognizeRequest
314
- #
315
- # speech_client = SpeechClient.new
316
- # request = StreamingRecognizeRequest.new
312
+ # speech_client = Google::Cloud::Speech::V1::SpeechClient.new
313
+ # request = {}
317
314
  # requests = [request]
318
315
  # speech_client.streaming_recognize(requests).each do |element|
319
316
  # # Process element.
@@ -16,7 +16,7 @@
16
16
  module Google
17
17
  module Cloud
18
18
  module Speech
19
- VERSION = "0.25.0"
19
+ VERSION = "0.26.0"
20
20
  end
21
21
  end
22
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-speech
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.25.0
4
+ version: 0.26.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Moore
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-07-11 00:00:00.000000000 Z
12
+ date: 2017-07-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: google-cloud-core
@@ -179,6 +179,7 @@ files:
179
179
  - lib/google-cloud-speech.rb
180
180
  - lib/google/cloud/speech.rb
181
181
  - lib/google/cloud/speech/audio.rb
182
+ - lib/google/cloud/speech/convert.rb
182
183
  - lib/google/cloud/speech/credentials.rb
183
184
  - lib/google/cloud/speech/operation.rb
184
185
  - lib/google/cloud/speech/project.rb
@@ -190,6 +191,7 @@ files:
190
191
  - lib/google/cloud/speech/v1/cloud_speech_services_pb.rb
191
192
  - lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb
192
193
  - lib/google/cloud/speech/v1/doc/google/protobuf/any.rb
194
+ - lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb
193
195
  - lib/google/cloud/speech/v1/doc/google/rpc/status.rb
194
196
  - lib/google/cloud/speech/v1/speech_client.rb
195
197
  - lib/google/cloud/speech/v1/speech_client_config.json