RubyGems - google-cloud-speech - Versions diffs - 0.25.0 → 0.26.0 - Mend

google-cloud-speech 0.25.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/lib/google/cloud/speech/audio.rb +13 -4
data/lib/google/cloud/speech/convert.rb +46 -0
data/lib/google/cloud/speech/project.rb +25 -8
data/lib/google/cloud/speech/result.rb +44 -6
data/lib/google/cloud/speech/v1.rb +68 -0
data/lib/google/cloud/speech/v1/cloud_speech_pb.rb +8 -0
data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +61 -29
data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +2 -2
data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +77 -0
data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +19 -19
data/lib/google/cloud/speech/v1/speech_client.rb +36 -39
data/lib/google/cloud/speech/version.rb +1 -1
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ccecf5215ecbd7fd0e0ef074b7638711905b8450
-  data.tar.gz: f4c270117ad7bc3611e435fb598b5dd6bcbc835c
+  metadata.gz: 454b1310e5e39c2b4ac676c26b6aea8a89f452c5
+  data.tar.gz: 12fa7c7129f20bdf92c5e61c3e1e294307937f1f
 SHA512:
-  metadata.gz: d034fa22e135e05a08e14b09ffc2cb075a5cd7757f717a7635bfbd551539d853935670664c21cd636f1b2f2bd68da0d769f2747c8f7b0002b79628d49360197e
-  data.tar.gz: 5b3101e327dbb801a311abc8cc0dcaf2ec3774cce69c22f8bd629fff62701bc9ab7b8717ef4b923dc0c95d0d4d3efb32642fd876d0d0dbe91cb4d99a0ed0fb0d
+  metadata.gz: dca3bf30a3edad90e5dec095183a4a721491b2cef886ecc82fa4a83afe6fdc3c5e1387b336f9d0a96286b6605dc58cead675242e8d04338b2c44c17896c9b3fd
+  data.tar.gz: 6ce650dbbeb8cccac3a3e63c290b191ff3c92436d65cb1a1887d6e3480ca4707dfd326017186f3da278f913e70057258d03469df503f9fcd0ce6c29a944e4fb5

data/lib/google/cloud/speech/audio.rb CHANGED Viewed

@@ -191,6 +191,10 @@ module Google
         #   phrases "hints" so that the speech recognition is more likely to
         #   recognize them. See [usage
         #   limits](https://cloud.google.com/speech/limits#content). Optional.
+        # @param [Boolean] words When `true`, return a list of words with
+        #   additional information about each word. Currently, the only
+        #   additional information provided is the the start and end time
+        #   offsets. See {Result#words}. Default is `false`.
         #
         # @return [Array<Result>] The transcribed text of audio recognized.
         #
@@ -209,14 +213,15 @@ module Google
         #   result.transcript #=> "how old is the Brooklyn Bridge"
         #   result.confidence #=> 0.9826789498329163
         #
-        def recognize max_alternatives: nil, profanity_filter: nil, phrases: nil
+        def recognize max_alternatives: nil, profanity_filter: nil,
+                      phrases: nil, words: nil
           ensure_speech!
           speech.recognize self, encoding: encoding, sample_rate: sample_rate,
                                  language: language,
                                  max_alternatives: max_alternatives,
                                  profanity_filter: profanity_filter,
-                                 phrases: phrases
+                                 phrases: phrases, words: words
         end
         ##
@@ -239,6 +244,10 @@ module Google
         #   phrases "hints" so that the speech recognition is more likely to
         #   recognize them. See [usage
         #   limits](https://cloud.google.com/speech/limits#content). Optional.
+        # @param [Boolean] words When `true`, return a list of words with
+        #   additional information about each word. Currently, the only
+        #   additional information provided is the the start and end time
+        #   offsets. See {Result#words}. Default is `false`.
         #
         # @return [Operation] A resource represents the long-running,
         #   asynchronous processing of a speech-recognition operation.
@@ -260,7 +269,7 @@ module Google
         #   results = op.results
         #
         def process max_alternatives: nil, profanity_filter: nil,
-                    phrases: nil
+                    phrases: nil, words: nil
           ensure_speech!
           speech.process self, encoding: encoding,
@@ -268,7 +277,7 @@ module Google
                                language: language,
                                max_alternatives: max_alternatives,
                                profanity_filter: profanity_filter,
-                               phrases: phrases
+                               phrases: phrases, words: words
         end
         alias_method :long_running_recognize, :process
         alias_method :recognize_job, :process

data/lib/google/cloud/speech/convert.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# Copyright 2017 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+require "google/protobuf/duration_pb"
+module Google
+  module Cloud
+    module Speech
+      ##
+      # @private Helper module for converting Speech values.
+      module Convert
+        module ClassMethods
+          def number_to_duration number
+            return nil if number.nil?
+            Google::Protobuf::Duration.new \
+              seconds: number.to_i,
+              nanos: (number.remainder(1) * 1000000000).round
+          end
+          def duration_to_number duration
+            return nil if duration.nil?
+            return duration.seconds if duration.nanos == 0
+            duration.seconds + (duration.nanos / 1000000000.0)
+          end
+        end
+        extend ClassMethods
+      end
+    end
+  end
+end

data/lib/google/cloud/speech/project.rb CHANGED Viewed

@@ -266,6 +266,10 @@ module Google
         #   phrases "hints" so that the speech recognition is more likely to
         #   recognize them. See [usage
         #   limits](https://cloud.google.com/speech/limits#content). Optional.
+        # @param [Boolean] words When `true`, return a list of words with
+        #   additional information about each word. Currently, the only
+        #   additional information provided is the the start and end time
+        #   offsets. See {Result#words}. Default is `false`.
         #
         # @return [Array<Result>] The transcribed text of audio recognized.
         #
@@ -308,7 +312,8 @@ module Google
         #                              max_alternatives: 10
         #
         def recognize source, encoding: nil, language: nil, sample_rate: nil,
-                      max_alternatives: nil, profanity_filter: nil, phrases: nil
+                      max_alternatives: nil, profanity_filter: nil,
+                      phrases: nil, words: nil
           ensure_service!
           audio_obj = audio source, encoding: encoding, language: language,
@@ -317,7 +322,8 @@ module Google
           config = audio_config(
             encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
             language: audio_obj.language, max_alternatives: max_alternatives,
-            profanity_filter: profanity_filter, phrases: phrases)
+            profanity_filter: profanity_filter, phrases: phrases,
+            words: words)
           grpc = service.recognize_sync audio_obj.to_grpc, config
           grpc.results.map do |result_grpc|
@@ -388,6 +394,10 @@ module Google
         #   phrases "hints" so that the speech recognition is more likely to
         #   recognize them. See [usage
         #   limits](https://cloud.google.com/speech/limits#content). Optional.
+        # @param [Boolean] words When `true`, return a list of words with
+        #   additional information about each word. Currently, the only
+        #   additional information provided is the the start and end time
+        #   offsets. See {Result#words}. Default is `false`.
         #
         # @return [Operation] A resource represents the long-running,
         #   asynchronous processing of a speech-recognition operation.
@@ -440,7 +450,8 @@ module Google
         #   op.reload!
         #
         def process source, encoding: nil, sample_rate: nil, language: nil,
-                    max_alternatives: nil, profanity_filter: nil, phrases: nil
+                    max_alternatives: nil, profanity_filter: nil, phrases: nil,
+                    words: nil
           ensure_service!
           audio_obj = audio source, encoding: encoding, language: language,
@@ -449,7 +460,8 @@ module Google
           config = audio_config(
             encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
             language: audio_obj.language, max_alternatives: max_alternatives,
-            profanity_filter: profanity_filter, phrases: phrases)
+            profanity_filter: profanity_filter, phrases: phrases,
+            words: words)
           grpc = service.recognize_async audio_obj.to_grpc, config
           Operation.from_grpc grpc
@@ -513,6 +525,10 @@ module Google
         #   phrases "hints" so that the speech recognition is more likely to
         #   recognize them. See [usage
         #   limits](https://cloud.google.com/speech/limits#content). Optional.
+        # @param [Boolean] words When `true`, return a list of words with
+        #   additional information about each word. Currently, the only
+        #   additional information provided is the the start and end time
+        #   offsets. See {Result#words}. Default is `false`.
         # @param [Boolean] utterance When `true`, the service will perform
         #   continuous recognition (continuing to process audio even if the user
         #   pauses speaking) until the client closes the output stream (gRPC
@@ -550,7 +566,7 @@ module Google
         #
         def stream encoding: nil, language: nil, sample_rate: nil,
                    max_alternatives: nil, profanity_filter: nil, phrases: nil,
-                   utterance: nil, interim: nil
+                   words: nil, utterance: nil, interim: nil
           ensure_service!
           grpc_req = V1::StreamingRecognizeRequest.new(
@@ -561,7 +577,7 @@ module Google
                                      sample_rate: sample_rate,
                                      max_alternatives: max_alternatives,
                                      profanity_filter: profanity_filter,
-                                     phrases: phrases),
+                                     phrases: phrases, words: words),
                 single_utterance: utterance,
                 interim_results: interim
               }.delete_if { |_, v| v.nil? }
@@ -608,7 +624,7 @@ module Google
         def audio_config encoding: nil, language: nil, sample_rate: nil,
                          max_alternatives: nil, profanity_filter: nil,
-                         phrases: nil
+                         phrases: nil, words: nil
           contexts = nil
           contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
           language = String(language) unless language.nil?
@@ -618,7 +634,8 @@ module Google
             sample_rate_hertz: sample_rate,
             max_alternatives: max_alternatives,
             profanity_filter: profanity_filter,
-            speech_contexts: contexts
+            speech_contexts: contexts,
+            enable_word_time_offsets: words
           }.delete_if { |_, v| v.nil? })
         end

data/lib/google/cloud/speech/result.rb CHANGED Viewed

@@ -14,6 +14,7 @@
 require "google/cloud/speech/v1"
+require "google/cloud/speech/convert"
 module Google
   module Cloud
@@ -35,6 +36,10 @@ module Google
       #   recognition is correct. This field is typically provided only for the
       #   top hypothesis. A value of 0.0 is a sentinel value indicating
       #   confidence was not set.
+      # @attr_reader [Array<Result::Word>] words A list of words with additional
+      #   information about each word. Currently, the only additional
+      #   information provided is the the start and end time offsets. Available
+      #   when using the `words` argument in relevant methods.
       # @attr_reader [Array<Result::Alternative>] alternatives Additional
       #   recognition hypotheses (up to the value specified in
       #   `max_alternatives`). The server may return fewer than
@@ -56,13 +61,14 @@ module Google
       #   result.confidence #=> 0.9826789498329163
       #
       class Result
-        attr_reader :transcript, :confidence, :alternatives
+        attr_reader :transcript, :confidence, :words, :alternatives
         ##
         # @private Creates a new Results instance.
-        def initialize transcript, confidence, alternatives = []
-          @transcript  = transcript
-          @confidence = confidence
+        def initialize transcript, confidence, words = [], alternatives = []
+          @transcript   = transcript
+          @confidence   = confidence
+          @words        = words
           @alternatives = alternatives
         end
@@ -71,10 +77,42 @@ module Google
         def self.from_grpc grpc
           head, *tail = *grpc.alternatives
           return nil if head.nil?
+          words = Array(head.words).map do |w|
+            Word.new w.word, Convert.duration_to_number(w.start_time),
+                     Convert.duration_to_number(w.end_time)
+          end
           alternatives = tail.map do |alt|
             Alternative.new alt.transcript, alt.confidence
           end
-          new head.transcript, head.confidence, alternatives
+          new head.transcript, head.confidence, words, alternatives
+        end
+        ##
+        # Word-specific information for recognized words. Currently, the only
+        # additional information provided is the the start and end time offsets.
+        # Available when using the `words` argument in relevant methods.
+        #
+        # @attr_reader [String] word The word corresponding to this set of
+        #   information.
+        # @attr_reader [Numeric] start_time Time offset relative to the
+        #   beginning of the audio, and corresponding to the start of the spoken
+        #   word. This field is only set if `words` was specified. This is an
+        #   experimental feature and the accuracy of the time offset can vary.
+        # @attr_reader [Numeric] end_time Time offset relative to the
+        #   beginning of the audio, and corresponding to the end of the spoken
+        #   word. This field is only set if `words` was specified. This is an
+        #   experimental feature and the accuracy of the time offset can vary.
+        class Word
+          attr_reader :word, :start_time, :end_time
+          alias_method :to_str, :word
+          ##
+          # @private Creates a new Result::Word instance.
+          def initialize word, start_time, end_time
+            @word       = word
+            @start_time = start_time
+            @end_time   = end_time
+          end
         end
         ##
@@ -114,7 +152,7 @@ module Google
           ##
           # @private Creates a new Result::Alternative instance.
           def initialize transcript, confidence
-            @transcript  = transcript
+            @transcript = transcript
             @confidence = confidence
           end
         end

data/lib/google/cloud/speech/v1.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # Copyright 2017, Google Inc. All rights reserved.
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,4 +12,71 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+module Google
+  module Cloud
+    # rubocop:disable LineLength
+    ##
+    # # Ruby Client for Google Cloud Speech API ([Alpha](https://github.com/GoogleCloudPlatform/google-cloud-ruby#versioning))
+    #
+    # [Google Cloud Speech API][Product Documentation]: Google Cloud Speech API.
+    # - [Product Documentation][]
+    #
+    # ## Quick Start
+    # In order to use this library, you first need to go through the following steps:
+    #
+    # 1. [Select or create a Cloud Platform project.](https://console.cloud.google.com/project)
+    # 2. [Enable the Google Cloud Speech API.](https://console.cloud.google.com/apis/api/speech)
+    # 3. [Setup Authentication.](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud/master/guides/authentication)
+    #
+    # ### Installation
+    # ```
+    # $ gem install google-cloud-speech
+    # ```
+    #
+    # ### Preview
+    # #### SpeechClient
+    # ```rb
+    # require "google/cloud/speech/v1"
+    #
+    # speech_client = Google::Cloud::Speech::V1::SpeechClient.new
+    # language_code = "en-US"
+    # sample_rate_hertz = 44100
+    # encoding = :FLAC
+    # config = {
+    #   language_code: language_code,
+    #   sample_rate_hertz: sample_rate_hertz,
+    #   encoding: encoding
+    # }
+    # uri = "gs://gapic-toolkit/hello.flac"
+    # audio = { uri: uri }
+    # response = speech_client.recognize(config, audio)
+    # ```
+    #
+    # ### Next Steps
+    # - Read the [Google Cloud Speech API Product documentation][Product Documentation] to learn more about the product and see How-to Guides.
+    # - View this [repository's main README](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/README.md) to see the full list of Cloud APIs that we cover.
+    #
+    # [Product Documentation]: https://cloud.google.com/speech
+    #
+    module Speech
+      # rubocop:enable LineLength
+      ##
+      # # Google Cloud Speech API Contents
+      #
+      # | Class | Description |
+      # | ----- | ----------- |
+      # | [SpeechClient][] | Google Cloud Speech API. |
+      # | [Data Types][] | Data types for Google::Cloud::Speech::V1 |
+      #
+      # [SpeechClient]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/speechclient
+      # [Data Types]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/datatypes
+      #
+      module V1
+      end
+    end
+  end
+end
 require "google/cloud/speech/v1/speech_client"

data/lib/google/cloud/speech/v1/cloud_speech_pb.rb CHANGED Viewed

@@ -36,6 +36,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
     optional :max_alternatives, :int32, 4
     optional :profanity_filter, :bool, 5
     repeated :speech_contexts, :message, 6, "google.cloud.speech.v1.SpeechContext"
+    optional :enable_word_time_offsets, :bool, 8
   end
   add_enum "google.cloud.speech.v1.RecognitionConfig.AudioEncoding" do
     value :ENCODING_UNSPECIFIED, 0
@@ -87,6 +88,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
   add_message "google.cloud.speech.v1.SpeechRecognitionAlternative" do
     optional :transcript, :string, 1
     optional :confidence, :float, 2
+    repeated :words, :message, 3, "google.cloud.speech.v1.WordInfo"
+  end
+  add_message "google.cloud.speech.v1.WordInfo" do
+    optional :start_time, :message, 1, "google.protobuf.Duration"
+    optional :end_time, :message, 2, "google.protobuf.Duration"
+    optional :word, :string, 3
   end
 end
@@ -110,6 +117,7 @@ module Google
         StreamingRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionResult").msgclass
         SpeechRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionResult").msgclass
         SpeechRecognitionAlternative = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionAlternative").msgclass
+        WordInfo = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.WordInfo").msgclass
       end
     end
   end

data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb CHANGED Viewed

@@ -56,7 +56,7 @@ module Google
         #     +audio_content+ data. The audio bytes must be encoded as specified in
         #     +RecognitionConfig+. Note: as with all bytes fields, protobuffers use a
         #     pure binary representation (not base64). See
-        #     {audio limits}[https://cloud.google.com/speech/limits#content].
+        #     [audio limits](https://cloud.google.com/speech/limits#content).
         class StreamingRecognizeRequest; end
         # Provides information to the recognizer that specifies how to process the
@@ -101,9 +101,9 @@ module Google
         # @!attribute [rw] language_code
         #   @return [String]
         #     *Required* The language of the supplied audio as a
-        #     {BCP-47}[https://www.rfc-editor.org/rfc/bcp/bcp47.txt] language tag.
+        #     [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
         #     Example: "en-US".
-        #     See {Language Support}[https://cloud.google.com/speech/docs/languages]
+        #     See [Language Support](https://cloud.google.com/speech/docs/languages)
         #     for a list of the currently supported language codes.
         # @!attribute [rw] max_alternatives
         #   @return [Integer]
@@ -122,11 +122,17 @@ module Google
         # @!attribute [rw] speech_contexts
         #   @return [Array<Google::Cloud::Speech::V1::SpeechContext>]
         #     *Optional* A means to provide context to assist the speech recognition.
+        # @!attribute [rw] enable_word_time_offsets
+        #   @return [true, false]
+        #     *Optional* If +true+, the top result includes a list of words and
+        #     the start and end time offsets (timestamps) for those words. If
+        #     +false+, no word-level time offset information is returned. The default is
+        #     +false+.
         class RecognitionConfig
           # Audio encoding of the data sent in the audio message. All encodings support
-          # only 1 channel (mono) audio. Only +FLAC+ includes a header that describes
-          # the bytes of audio that follow the header. The other encodings are raw
-          # audio bytes with no header.
+          # only 1 channel (mono) audio. Only +FLAC+ and +WAV+ include a header that
+          # describes the bytes of audio that follow the header. The other encodings
+          # are raw audio bytes with no header.
           #
           # For best results, the audio source should be captured and transmitted using
           # a lossless encoding (+FLAC+ or +LINEAR16+). Recognition accuracy may be
@@ -134,13 +140,13 @@ module Google
           # this section, are used to capture or transmit the audio, particularly if
           # background noise is present.
           module AudioEncoding
-            # Not specified. Will return result Google::Rpc::Code::INVALID_ARGUMENT.
+            # Not specified. Will return result {Google::Rpc::Code::INVALID_ARGUMENT}.
             ENCODING_UNSPECIFIED = 0
             # Uncompressed 16-bit signed little-endian samples (Linear PCM).
             LINEAR16 = 1
-            # {+FLAC+}[https://xiph.org/flac/documentation.html] (Free Lossless Audio
+            # [+FLAC+](https://xiph.org/flac/documentation.html) (Free Lossless Audio
             # Codec) is the recommended encoding because it is
             # lossless--therefore recognition is not compromised--and
             # requires only about half the bandwidth of +LINEAR16+. +FLAC+ stream
@@ -158,17 +164,17 @@ module Google
             AMR_WB = 5
             # Opus encoded audio frames in Ogg container
-            # ({OggOpus}[https://wiki.xiph.org/OggOpus]).
+            # ([OggOpus](https://wiki.xiph.org/OggOpus)).
             # +sample_rate_hertz+ must be 16000.
             OGG_OPUS = 6
             # Although the use of lossy encodings is not recommended, if a very low
             # bitrate encoding is required, +OGG_OPUS+ is highly preferred over
-            # Speex encoding. The {Speex}[https://speex.org/]  encoding supported by
+            # Speex encoding. The [Speex](https://speex.org/)  encoding supported by
             # Cloud Speech API has a header byte in each block, as in MIME type
             # +audio/x-speex-with-header-byte+.
             # It is a variant of the RTP Speex encoding defined in
-            # {RFC 5574}[https://tools.ietf.org/html/rfc5574].
+            # [RFC 5574](https://tools.ietf.org/html/rfc5574).
             # The stream is a sequence of blocks, one block per RTP packet. Each block
             # starts with a byte containing the length of the block, in bytes, followed
             # by one or more frames of Speex data, padded to an integral number of
@@ -188,13 +194,13 @@ module Google
         #     to improve the accuracy for specific words and phrases, for example, if
         #     specific commands are typically spoken by the user. This can also be used
         #     to add additional words to the vocabulary of the recognizer. See
-        #     {usage limits}[https://cloud.google.com/speech/limits#content].
+        #     [usage limits](https://cloud.google.com/speech/limits#content).
         class SpeechContext; end
         # Contains audio data in the encoding specified in the +RecognitionConfig+.
         # Either +content+ or +uri+ must be supplied. Supplying both or neither
-        # returns Google::Rpc::Code::INVALID_ARGUMENT. See
-        # {audio limits}[https://cloud.google.com/speech/limits#content].
+        # returns {Google::Rpc::Code::INVALID_ARGUMENT}. See
+        # [audio limits](https://cloud.google.com/speech/limits#content).
         # @!attribute [rw] content
         #   @return [String]
         #     The audio data bytes encoded as specified in
@@ -206,8 +212,8 @@ module Google
         #     +RecognitionConfig+. Currently, only Google Cloud Storage URIs are
         #     supported, which must be specified in the following format:
         #     +gs://bucket_name/object_name+ (other URI formats return
-        #     Google::Rpc::Code::INVALID_ARGUMENT). For more information, see
-        #     {Request URIs}[https://cloud.google.com/storage/docs/reference-uris].
+        #     {Google::Rpc::Code::INVALID_ARGUMENT}). For more information, see
+        #     [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
         class RecognitionAudio; end
         # The only message returned to the client by the +Recognize+ method. It
@@ -269,34 +275,32 @@ module Google
         # 6. results { alternatives { transcript: " that is" } stability: 0.9 }
         #    results { alternatives { transcript: " the question" } stability: 0.01 }
         #
-        # 7. speech_event_type: END_OF_SINGLE_UTTERANCE
-        #
-        # 8. results { alternatives { transcript: " that is the question"
+        # 7. results { alternatives { transcript: " that is the question"
         #                             confidence: 0.98 }
         #              alternatives { transcript: " that was the question" }
         #              is_final: true }
         #
         # Notes:
         #
-        # - Only two of the above responses #4 and #8 contain final results; they are
+        # * Only two of the above responses #4 and #7 contain final results; they are
         #   indicated by +is_final: true+. Concatenating these together generates the
         #   full transcript: "to be or not to be that is the question".
         #
-        # - The others contain interim +results+. #3 and #6 contain two interim
+        # * The others contain interim +results+. #3 and #6 contain two interim
         #   +results+: the first portion has a high stability and is less likely to
         #   change; the second portion has a low stability and is very likely to
         #   change. A UI designer might choose to show only high stability +results+.
         #
-        # - The specific +stability+ and +confidence+ values shown above are only for
+        # * The specific +stability+ and +confidence+ values shown above are only for
         #   illustrative purposes. Actual values may vary.
         #
-        # - In each response, only one of these fields will be set:
-        #     +error+,
-        #     +speech_event_type+, or
-        #     one or more (repeated) +results+.
+        # * In each response, only one of these fields will be set:
+        #   +error+,
+        #   +speech_event_type+, or
+        #   one or more (repeated) +results+.
         # @!attribute [rw] error
         #   @return [Google::Rpc::Status]
-        #     *Output-only* If set, returns a Google::Rpc::Status message that
+        #     *Output-only* If set, returns a {Google::Rpc::Status} message that
         #     specifies the error for the operation.
         # @!attribute [rw] results
         #   @return [Array<Google::Cloud::Speech::V1::StreamingRecognitionResult>]
@@ -351,6 +355,8 @@ module Google
         #   @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionAlternative>]
         #     *Output-only* May contain one or more recognition hypotheses (up to the
         #     maximum specified in +max_alternatives+).
+        #     These alternatives are ordered in terms of accuracy, with the top (first)
+        #     alternative being the most probable, as ranked by the recognizer.
         class SpeechRecognitionResult; end
         # Alternative hypotheses (a.k.a. n-best list).
@@ -363,10 +369,36 @@ module Google
         #     indicates an estimated greater likelihood that the recognized words are
         #     correct. This field is typically provided only for the top hypothesis, and
         #     only for +is_final=true+ results. Clients should not rely on the
-        #     +confidence+ field as it is not guaranteed to be accurate, or even set, in
-        #     any of the results.
+        #     +confidence+ field as it is not guaranteed to be accurate or consistent.
         #     The default of 0.0 is a sentinel value indicating +confidence+ was not set.
+        # @!attribute [rw] words
+        #   @return [Array<Google::Cloud::Speech::V1::WordInfo>]
+        #     *Output-only* A list of word-specific information for each recognized word.
         class SpeechRecognitionAlternative; end
+        # Word-specific information for recognized words. Word information is only
+        # included in the response when certain request parameters are set, such
+        # as +enable_word_time_offsets+.
+        # @!attribute [rw] start_time
+        #   @return [Google::Protobuf::Duration]
+        #     *Output-only* Time offset relative to the beginning of the audio,
+        #     and corresponding to the start of the spoken word.
+        #     This field is only set if +enable_word_time_offsets=true+ and only
+        #     in the top hypothesis.
+        #     This is an experimental feature and the accuracy of the time offset can
+        #     vary.
+        # @!attribute [rw] end_time
+        #   @return [Google::Protobuf::Duration]
+        #     *Output-only* Time offset relative to the beginning of the audio,
+        #     and corresponding to the end of the spoken word.
+        #     This field is only set if +enable_word_time_offsets=true+ and only
+        #     in the top hypothesis.
+        #     This is an experimental feature and the accuracy of the time offset can
+        #     vary.
+        # @!attribute [rw] word
+        #   @return [String]
+        #     *Output-only* The word corresponding to this set of information.
+        class WordInfo; end
       end
     end
   end

data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb CHANGED Viewed

@@ -77,7 +77,7 @@ module Google
     # If the embedded message type is well-known and has a custom JSON
     # representation, that representation will be embedded adding a field
     # +value+ which holds the custom JSON in addition to the +@type+
-    # field. Example (for message Google::Protobuf::Duration):
+    # field. Example (for message {Google::Protobuf::Duration}):
     #
     #     {
     #       "@type": "type.googleapis.com/google.protobuf.Duration",
@@ -96,7 +96,7 @@ module Google
     #       qualified name of the type (as in +path/google.protobuf.Duration+).
     #       The name should be in a canonical form (e.g., leading "." is
     #       not accepted).
-    #     * An HTTP GET on the URL must yield a Google::Protobuf::Type
+    #     * An HTTP GET on the URL must yield a {Google::Protobuf::Type}
     #       value in binary format, or produce an error.
     #     * Applications are allowed to cache lookup results based on the
     #       URL, or have them precompiled into a binary to avoid any

data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb ADDED Viewed

@@ -0,0 +1,77 @@
+# Copyright 2017, Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+module Google
+  module Protobuf
+    # A Duration represents a signed, fixed-length span of time represented
+    # as a count of seconds and fractions of seconds at nanosecond
+    # resolution. It is independent of any calendar and concepts like "day"
+    # or "month". It is related to Timestamp in that the difference between
+    # two Timestamp values is a Duration and it can be added or subtracted
+    # from a Timestamp. Range is approximately +-10,000 years.
+    #
+    # Example 1: Compute Duration from two Timestamps in pseudo code.
+    #
+    #     Timestamp start = ...;
+    #     Timestamp end = ...;
+    #     Duration duration = ...;
+    #
+    #     duration.seconds = end.seconds - start.seconds;
+    #     duration.nanos = end.nanos - start.nanos;
+    #
+    #     if (duration.seconds < 0 && duration.nanos > 0) {
+    #       duration.seconds += 1;
+    #       duration.nanos -= 1000000000;
+    #     } else if (durations.seconds > 0 && duration.nanos < 0) {
+    #       duration.seconds -= 1;
+    #       duration.nanos += 1000000000;
+    #     }
+    #
+    # Example 2: Compute Timestamp from Timestamp + Duration in pseudo code.
+    #
+    #     Timestamp start = ...;
+    #     Duration duration = ...;
+    #     Timestamp end = ...;
+    #
+    #     end.seconds = start.seconds + duration.seconds;
+    #     end.nanos = start.nanos + duration.nanos;
+    #
+    #     if (end.nanos < 0) {
+    #       end.seconds -= 1;
+    #       end.nanos += 1000000000;
+    #     } else if (end.nanos >= 1000000000) {
+    #       end.seconds += 1;
+    #       end.nanos -= 1000000000;
+    #     }
+    #
+    # Example 3: Compute Duration from datetime.timedelta in Python.
+    #
+    #     td = datetime.timedelta(days=3, minutes=10)
+    #     duration = Duration()
+    #     duration.FromTimedelta(td)
+    # @!attribute [rw] seconds
+    #   @return [Integer]
+    #     Signed seconds of the span of time. Must be from -315,576,000,000
+    #     to +315,576,000,000 inclusive.
+    # @!attribute [rw] nanos
+    #   @return [Integer]
+    #     Signed fractions of a second at nanosecond resolution of the span
+    #     of time. Durations less than one second are represented with a 0
+    #     +seconds+ field and a positive or negative +nanos+ field. For durations
+    #     of one second or more, a non-zero value for the +nanos+ field must be
+    #     of the same sign as the +seconds+ field. Must be from -999,999,999
+    #     to +999,999,999 inclusive.
+    class Duration; end
+  end
+end

data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb CHANGED Viewed

@@ -16,16 +16,16 @@ module Google
   module Rpc
     # The +Status+ type defines a logical error model that is suitable for different
     # programming environments, including REST APIs and RPC APIs. It is used by
-    # {gRPC}[https://github.com/grpc]. The error model is designed to be:
+    # [gRPC](https://github.com/grpc). The error model is designed to be:
     #
-    # - Simple to use and understand for most users
-    # - Flexible enough to meet unexpected needs
+    # * Simple to use and understand for most users
+    # * Flexible enough to meet unexpected needs
     #
     # = Overview
     #
     # The +Status+ message contains three pieces of data: error code, error message,
     # and error details. The error code should be an enum value of
-    # Google::Rpc::Code, but it may accept additional error codes if needed.  The
+    # {Google::Rpc::Code}, but it may accept additional error codes if needed.  The
     # error message should be a developer-facing English message that helps
     # developers *understand* and *resolve* the error. If a localized user-facing
     # error message is needed, put the localized message in the error details or
@@ -49,31 +49,31 @@ module Google
     #
     # Example uses of this error model include:
     #
-    # - Partial errors. If a service needs to return partial errors to the client,
-    #     it may embed the +Status+ in the normal response to indicate the partial
-    #     errors.
+    # * Partial errors. If a service needs to return partial errors to the client,
+    #   it may embed the +Status+ in the normal response to indicate the partial
+    #   errors.
     #
-    # - Workflow errors. A typical workflow has multiple steps. Each step may
-    #     have a +Status+ message for error reporting purpose.
+    # * Workflow errors. A typical workflow has multiple steps. Each step may
+    #   have a +Status+ message for error reporting purpose.
     #
-    # - Batch operations. If a client uses batch request and batch response, the
-    #     +Status+ message should be used directly inside batch response, one for
-    #     each error sub-response.
+    # * Batch operations. If a client uses batch request and batch response, the
+    #   +Status+ message should be used directly inside batch response, one for
+    #   each error sub-response.
     #
-    # - Asynchronous operations. If an API call embeds asynchronous operation
-    #     results in its response, the status of those operations should be
-    #     represented directly using the +Status+ message.
+    # * Asynchronous operations. If an API call embeds asynchronous operation
+    #   results in its response, the status of those operations should be
+    #   represented directly using the +Status+ message.
     #
-    # - Logging. If some API errors are stored in logs, the message +Status+ could
-    #     be used directly after any stripping needed for security/privacy reasons.
+    # * Logging. If some API errors are stored in logs, the message +Status+ could
+    #   be used directly after any stripping needed for security/privacy reasons.
     # @!attribute [rw] code
     #   @return [Integer]
-    #     The status code, which should be an enum value of Google::Rpc::Code.
+    #     The status code, which should be an enum value of {Google::Rpc::Code}.
     # @!attribute [rw] message
     #   @return [String]
     #     A developer-facing error message, which should be in English. Any
     #     user-facing error message should be localized and sent in the
-    #     Google::Rpc::Status#details field, or localized by the client.
+    #     {Google::Rpc::Status#details} field, or localized by the client.
     # @!attribute [rw] details
     #   @return [Array<Google::Protobuf::Any>]
     #     A list of messages that carry the error details.  There will be a

data/lib/google/cloud/speech/v1/speech_client.rb CHANGED Viewed

@@ -165,11 +165,15 @@ module Google
           # Performs synchronous speech recognition: receive results after all audio
           # has been sent and processed.
           #
-          # @param config [Google::Cloud::Speech::V1::RecognitionConfig]
+          # @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
           #   *Required* Provides information to the recognizer that specifies how to
           #   process the request.
-          # @param audio [Google::Cloud::Speech::V1::RecognitionAudio]
+          #   A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
+          #   can also be provided.
+          # @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
           #   *Required* The audio data to be recognized.
+          #   A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
+          #   can also be provided.
           # @param options [Google::Gax::CallOptions]
           #   Overrides the default settings for this call, e.g, timeout,
           #   retries, etc.
@@ -178,32 +182,28 @@ module Google
           # @example
           #   require "google/cloud/speech/v1"
           #
-          #   AudioEncoding = Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding
-          #   RecognitionAudio = Google::Cloud::Speech::V1::RecognitionAudio
-          #   RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
-          #   SpeechClient = Google::Cloud::Speech::V1::SpeechClient
-          #
-          #   speech_client = SpeechClient.new
-          #   encoding = AudioEncoding::FLAC
+          #   speech_client = Google::Cloud::Speech::V1::SpeechClient.new
+          #   encoding = :FLAC
           #   sample_rate_hertz = 44100
           #   language_code = "en-US"
-          #   config = RecognitionConfig.new
-          #   config.encoding = encoding
-          #   config.sample_rate_hertz = sample_rate_hertz
-          #   config.language_code = language_code
+          #   config = {
+          #     encoding: encoding,
+          #     sample_rate_hertz: sample_rate_hertz,
+          #     language_code: language_code
+          #   }
           #   uri = "gs://bucket_name/file_name.flac"
-          #   audio = RecognitionAudio.new
-          #   audio.uri = uri
+          #   audio = { uri: uri }
           #   response = speech_client.recognize(config, audio)
           def recognize \
               config,
               audio,
               options: nil
-            req = Google::Cloud::Speech::V1::RecognizeRequest.new({
+            req = {
               config: config,
               audio: audio
-            }.delete_if { |_, v| v.nil? })
+            }.delete_if { |_, v| v.nil? }
+            req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::RecognizeRequest)
             @recognize.call(req, options)
           end
@@ -212,11 +212,15 @@ module Google
           # +Operation.error+ or an +Operation.response+ which contains
           # a +LongRunningRecognizeResponse+ message.
           #
-          # @param config [Google::Cloud::Speech::V1::RecognitionConfig]
+          # @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
           #   *Required* Provides information to the recognizer that specifies how to
           #   process the request.
-          # @param audio [Google::Cloud::Speech::V1::RecognitionAudio]
+          #   A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
+          #   can also be provided.
+          # @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
           #   *Required* The audio data to be recognized.
+          #   A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
+          #   can also be provided.
           # @param options [Google::Gax::CallOptions]
           #   Overrides the default settings for this call, e.g, timeout,
           #   retries, etc.
@@ -225,22 +229,17 @@ module Google
           # @example
           #   require "google/cloud/speech/v1"
           #
-          #   AudioEncoding = Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding
-          #   RecognitionAudio = Google::Cloud::Speech::V1::RecognitionAudio
-          #   RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
-          #   SpeechClient = Google::Cloud::Speech::V1::SpeechClient
-          #
-          #   speech_client = SpeechClient.new
-          #   encoding = AudioEncoding::FLAC
+          #   speech_client = Google::Cloud::Speech::V1::SpeechClient.new
+          #   encoding = :FLAC
           #   sample_rate_hertz = 44100
           #   language_code = "en-US"
-          #   config = RecognitionConfig.new
-          #   config.encoding = encoding
-          #   config.sample_rate_hertz = sample_rate_hertz
-          #   config.language_code = language_code
+          #   config = {
+          #     encoding: encoding,
+          #     sample_rate_hertz: sample_rate_hertz,
+          #     language_code: language_code
+          #   }
           #   uri = "gs://bucket_name/file_name.flac"
-          #   audio = RecognitionAudio.new
-          #   audio.uri = uri
+          #   audio = { uri: uri }
           #
           #   # Register a callback during the method call.
           #   operation = speech_client.long_running_recognize(config, audio) do |op|
@@ -273,10 +272,11 @@ module Google
               config,
               audio,
               options: nil
-            req = Google::Cloud::Speech::V1::LongRunningRecognizeRequest.new({
+            req = {
               config: config,
               audio: audio
-            }.delete_if { |_, v| v.nil? })
+            }.delete_if { |_, v| v.nil? }
+            req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::LongRunningRecognizeRequest)
             operation = Google::Gax::Operation.new(
               @long_running_recognize.call(req, options),
               @operations_client,
@@ -309,11 +309,8 @@ module Google
           # @example
           #   require "google/cloud/speech/v1"
           #
-          #   SpeechClient = Google::Cloud::Speech::V1::SpeechClient
-          #   StreamingRecognizeRequest = Google::Cloud::Speech::V1::StreamingRecognizeRequest
-          #
-          #   speech_client = SpeechClient.new
-          #   request = StreamingRecognizeRequest.new
+          #   speech_client = Google::Cloud::Speech::V1::SpeechClient.new
+          #   request = {}
           #   requests = [request]
           #   speech_client.streaming_recognize(requests).each do |element|
           #     # Process element.

data/lib/google/cloud/speech/version.rb CHANGED Viewed

@@ -16,7 +16,7 @@
 module Google
   module Cloud
     module Speech
-      VERSION = "0.25.0"
+      VERSION = "0.26.0"
     end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: google-cloud-speech
 version: !ruby/object:Gem::Version
-  version: 0.25.0
+  version: 0.26.0
 platform: ruby
 authors:
 - Mike Moore
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-07-11 00:00:00.000000000 Z
+date: 2017-07-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: google-cloud-core
@@ -179,6 +179,7 @@ files:
 - lib/google-cloud-speech.rb
 - lib/google/cloud/speech.rb
 - lib/google/cloud/speech/audio.rb
+- lib/google/cloud/speech/convert.rb
 - lib/google/cloud/speech/credentials.rb
 - lib/google/cloud/speech/operation.rb
 - lib/google/cloud/speech/project.rb
@@ -190,6 +191,7 @@ files:
 - lib/google/cloud/speech/v1/cloud_speech_services_pb.rb
 - lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb
 - lib/google/cloud/speech/v1/doc/google/protobuf/any.rb
+- lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb
 - lib/google/cloud/speech/v1/doc/google/rpc/status.rb
 - lib/google/cloud/speech/v1/speech_client.rb
 - lib/google/cloud/speech/v1/speech_client_config.json