RubyGems - google-cloud-speech - Versions diffs - 0.25.0 → 0.26.0 - Mend

google-cloud-speech 0.25.0 → 0.26.0

Files changed (14) hide show

checksums.yaml +4 -4
data/lib/google/cloud/speech/audio.rb +13 -4
data/lib/google/cloud/speech/convert.rb +46 -0
data/lib/google/cloud/speech/project.rb +25 -8
data/lib/google/cloud/speech/result.rb +44 -6
data/lib/google/cloud/speech/v1.rb +68 -0
data/lib/google/cloud/speech/v1/cloud_speech_pb.rb +8 -0
data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +61 -29
data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +2 -2
data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +77 -0
data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +19 -19
data/lib/google/cloud/speech/v1/speech_client.rb +36 -39
data/lib/google/cloud/speech/version.rb +1 -1
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ccecf5215ecbd7fd0e0ef074b7638711905b8450
-  data.tar.gz: f4c270117ad7bc3611e435fb598b5dd6bcbc835c
+  metadata.gz: 454b1310e5e39c2b4ac676c26b6aea8a89f452c5
+  data.tar.gz: 12fa7c7129f20bdf92c5e61c3e1e294307937f1f
 SHA512:
-  metadata.gz: d034fa22e135e05a08e14b09ffc2cb075a5cd7757f717a7635bfbd551539d853935670664c21cd636f1b2f2bd68da0d769f2747c8f7b0002b79628d49360197e
-  data.tar.gz: 5b3101e327dbb801a311abc8cc0dcaf2ec3774cce69c22f8bd629fff62701bc9ab7b8717ef4b923dc0c95d0d4d3efb32642fd876d0d0dbe91cb4d99a0ed0fb0d
+  metadata.gz: dca3bf30a3edad90e5dec095183a4a721491b2cef886ecc82fa4a83afe6fdc3c5e1387b336f9d0a96286b6605dc58cead675242e8d04338b2c44c17896c9b3fd
+  data.tar.gz: 6ce650dbbeb8cccac3a3e63c290b191ff3c92436d65cb1a1887d6e3480ca4707dfd326017186f3da278f913e70057258d03469df503f9fcd0ce6c29a944e4fb5

data/lib/google/cloud/speech/audio.rb CHANGED Viewed

@@ -191,6 +191,10 @@ module Google
         #   phrases "hints" so that the speech recognition is more likely to
         #   recognize them. See [usage
         #   limits](https://cloud.google.com/speech/limits#content). Optional.
+        # @param [Boolean] words When `true`, return a list of words with
+        #   additional information about each word. Currently, the only
+        #   additional information provided is the the start and end time
+        #   offsets. See {Result#words}. Default is `false`.
         #
         # @return [Array<Result>] The transcribed text of audio recognized.
         #
@@ -209,14 +213,15 @@ module Google
         #   result.transcript #=> "how old is the Brooklyn Bridge"
         #   result.confidence #=> 0.9826789498329163
         #
-        def recognize max_alternatives: nil, profanity_filter: nil, phrases: nil
+        def recognize max_alternatives: nil, profanity_filter: nil,
+                      phrases: nil, words: nil
           ensure_speech!
           speech.recognize self, encoding: encoding, sample_rate: sample_rate,
                                  language: language,
                                  max_alternatives: max_alternatives,
                                  profanity_filter: profanity_filter,
-                                 phrases: phrases
+                                 phrases: phrases, words: words
         end
         ##
@@ -239,6 +244,10 @@ module Google
         #   phrases "hints" so that the speech recognition is more likely to
         #   recognize them. See [usage
         #   limits](https://cloud.google.com/speech/limits#content). Optional.
+        # @param [Boolean] words When `true`, return a list of words with
+        #   additional information about each word. Currently, the only
+        #   additional information provided is the the start and end time
+        #   offsets. See {Result#words}. Default is `false`.
         #
         # @return [Operation] A resource represents the long-running,
         #   asynchronous processing of a speech-recognition operation.
@@ -260,7 +269,7 @@ module Google
         #   results = op.results
         #
         def process max_alternatives: nil, profanity_filter: nil,
-                    phrases: nil
+                    phrases: nil, words: nil
           ensure_speech!
           speech.process self, encoding: encoding,
@@ -268,7 +277,7 @@ module Google
                                language: language,
                                max_alternatives: max_alternatives,
                                profanity_filter: profanity_filter,
-                               phrases: phrases
+                               phrases: phrases, words: words
         end
         alias_method :long_running_recognize, :process
         alias_method :recognize_job, :process

data/lib/google/cloud/speech/convert.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# Copyright 2017 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+require "google/protobuf/duration_pb"
+module Google
+  module Cloud
+    module Speech
+      ##
+      # @private Helper module for converting Speech values.
+      module Convert
+        module ClassMethods
+          def number_to_duration number
+            return nil if number.nil?
+            Google::Protobuf::Duration.new \
+              seconds: number.to_i,
+              nanos: (number.remainder(1) * 1000000000).round
+          end
+          def duration_to_number duration
+            return nil if duration.nil?
+            return duration.seconds if duration.nanos == 0
+            duration.seconds + (duration.nanos / 1000000000.0)
+          end
+        end
+        extend ClassMethods
+      end
+    end
+  end
+end

data/lib/google/cloud/speech/project.rb CHANGED Viewed

@@ -266,6 +266,10 @@ module Google
         #   phrases "hints" so that the speech recognition is more likely to
         #   recognize them. See [usage
         #   limits](https://cloud.google.com/speech/limits#content). Optional.
+        # @param [Boolean] words When `true`, return a list of words with
+        #   additional information about each word. Currently, the only
+        #   additional information provided is the the start and end time
+        #   offsets. See {Result#words}. Default is `false`.
         #
         # @return [Array<Result>] The transcribed text of audio recognized.
         #
@@ -308,7 +312,8 @@ module Google
         #                              max_alternatives: 10
         #
         def recognize source, encoding: nil, language: nil, sample_rate: nil,
-                      max_alternatives: nil, profanity_filter: nil, phrases: nil
+                      max_alternatives: nil, profanity_filter: nil,
+                      phrases: nil, words: nil
           ensure_service!
           audio_obj = audio source, encoding: encoding, language: language,
@@ -317,7 +322,8 @@ module Google
           config = audio_config(
             encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
             language: audio_obj.language, max_alternatives: max_alternatives,
-            profanity_filter: profanity_filter, phrases: phrases)
+            profanity_filter: profanity_filter, phrases: phrases,
+            words: words)
           grpc = service.recognize_sync audio_obj.to_grpc, config
           grpc.results.map do |result_grpc|
@@ -388,6 +394,10 @@ module Google
         #   phrases "hints" so that the speech recognition is more likely to
         #   recognize them. See [usage
         #   limits](https://cloud.google.com/speech/limits#content). Optional.
+        # @param [Boolean] words When `true`, return a list of words with
+        #   additional information about each word. Currently, the only
+        #   additional information provided is the the start and end time
+        #   offsets. See {Result#words}. Default is `false`.
         #
         # @return [Operation] A resource represents the long-running,
         #   asynchronous processing of a speech-recognition operation.
@@ -440,7 +450,8 @@ module Google
         #   op.reload!
         #
         def process source, encoding: nil, sample_rate: nil, language: nil,
-                    max_alternatives: nil, profanity_filter: nil, phrases: nil
+                    max_alternatives: nil, profanity_filter: nil, phrases: nil,
+                    words: nil
           ensure_service!
           audio_obj = audio source, encoding: encoding, language: language,
@@ -449,7 +460,8 @@ module Google
           config = audio_config(
             encoding: audio_obj.encoding, sample_rate: audio_obj.sample_rate,
             language: audio_obj.language, max_alternatives: max_alternatives,
-            profanity_filter: profanity_filter, phrases: phrases)
+            profanity_filter: profanity_filter, phrases: phrases,
+            words: words)
           grpc = service.recognize_async audio_obj.to_grpc, config
           Operation.from_grpc grpc
@@ -513,6 +525,10 @@ module Google
         #   phrases "hints" so that the speech recognition is more likely to
         #   recognize them. See [usage
         #   limits](https://cloud.google.com/speech/limits#content). Optional.
+        # @param [Boolean] words When `true`, return a list of words with
+        #   additional information about each word. Currently, the only
+        #   additional information provided is the the start and end time
+        #   offsets. See {Result#words}. Default is `false`.
         # @param [Boolean] utterance When `true`, the service will perform
         #   continuous recognition (continuing to process audio even if the user
         #   pauses speaking) until the client closes the output stream (gRPC
@@ -550,7 +566,7 @@ module Google
         #
         def stream encoding: nil, language: nil, sample_rate: nil,
                    max_alternatives: nil, profanity_filter: nil, phrases: nil,
-                   utterance: nil, interim: nil
+                   words: nil, utterance: nil, interim: nil
           ensure_service!
           grpc_req = V1::StreamingRecognizeRequest.new(
@@ -561,7 +577,7 @@ module Google
                                      sample_rate: sample_rate,
                                      max_alternatives: max_alternatives,
                                      profanity_filter: profanity_filter,
-                                     phrases: phrases),
+                                     phrases: phrases, words: words),
                 single_utterance: utterance,
                 interim_results: interim
               }.delete_if { |_, v| v.nil? }
@@ -608,7 +624,7 @@ module Google
         def audio_config encoding: nil, language: nil, sample_rate: nil,
                          max_alternatives: nil, profanity_filter: nil,
-                         phrases: nil
+                         phrases: nil, words: nil
           contexts = nil
           contexts = [V1::SpeechContext.new(phrases: phrases)] if phrases
           language = String(language) unless language.nil?
@@ -618,7 +634,8 @@ module Google
             sample_rate_hertz: sample_rate,
             max_alternatives: max_alternatives,
             profanity_filter: profanity_filter,
-            speech_contexts: contexts
+            speech_contexts: contexts,
+            enable_word_time_offsets: words
           }.delete_if { |_, v| v.nil? })
         end

data/lib/google/cloud/speech/result.rb CHANGED Viewed

@@ -14,6 +14,7 @@
 require "google/cloud/speech/v1"
+require "google/cloud/speech/convert"
 module Google
   module Cloud
@@ -35,6 +36,10 @@ module Google
       #   recognition is correct. This field is typically provided only for the
       #   top hypothesis. A value of 0.0 is a sentinel value indicating
       #   confidence was not set.
+      # @attr_reader [Array<Result::Word>] words A list of words with additional
+      #   information about each word. Currently, the only additional
+      #   information provided is the the start and end time offsets. Available
+      #   when using the `words` argument in relevant methods.
       # @attr_reader [Array<Result::Alternative>] alternatives Additional
       #   recognition hypotheses (up to the value specified in
       #   `max_alternatives`). The server may return fewer than
@@ -56,13 +61,14 @@ module Google
       #   result.confidence #=> 0.9826789498329163
       #
       class Result
-        attr_reader :transcript, :confidence, :alternatives
+        attr_reader :transcript, :confidence, :words, :alternatives
         ##
         # @private Creates a new Results instance.
-        def initialize transcript, confidence, alternatives = []
-          @transcript  = transcript
-          @confidence = confidence
+        def initialize transcript, confidence, words = [], alternatives = []
+          @transcript   = transcript
+          @confidence   = confidence
+          @words        = words
           @alternatives = alternatives
         end
@@ -71,10 +77,42 @@ module Google
         def self.from_grpc grpc
           head, *tail = *grpc.alternatives
           return nil if head.nil?
+          words = Array(head.words).map do |w|
+            Word.new w.word, Convert.duration_to_number(w.start_time),
+                     Convert.duration_to_number(w.end_time)
+          end
           alternatives = tail.map do |alt|
             Alternative.new alt.transcript, alt.confidence
           end
-          new head.transcript, head.confidence, alternatives
+          new head.transcript, head.confidence, words, alternatives
+        end
+        ##
+        # Word-specific information for recognized words. Currently, the only
+        # additional information provided is the the start and end time offsets.
+        # Available when using the `words` argument in relevant methods.
+        #
+        # @attr_reader [String] word The word corresponding to this set of
+        #   information.
+        # @attr_reader [Numeric] start_time Time offset relative to the
+        #   beginning of the audio, and corresponding to the start of the spoken
+        #   word. This field is only set if `words` was specified. This is an
+        #   experimental feature and the accuracy of the time offset can vary.
+        # @attr_reader [Numeric] end_time Time offset relative to the
+        #   beginning of the audio, and corresponding to the end of the spoken
+        #   word. This field is only set if `words` was specified. This is an
+        #   experimental feature and the accuracy of the time offset can vary.
+        class Word
+          attr_reader :word, :start_time, :end_time
+          alias_method :to_str, :word
+          ##
+          # @private Creates a new Result::Word instance.
+          def initialize word, start_time, end_time
+            @word       = word
+            @start_time = start_time
+            @end_time   = end_time
+          end
         end
         ##
@@ -114,7 +152,7 @@ module Google
           ##
           # @private Creates a new Result::Alternative instance.
           def initialize transcript, confidence
-            @transcript  = transcript
+            @transcript = transcript
             @confidence = confidence
           end
         end

data/lib/google/cloud/speech/v1.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # Copyright 2017, Google Inc. All rights reserved.
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,4 +12,71 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+module Google
+  module Cloud
+    # rubocop:disable LineLength
+    ##
+    # # Ruby Client for Google Cloud Speech API ([Alpha](https://github.com/GoogleCloudPlatform/google-cloud-ruby#versioning))
+    #
+    # [Google Cloud Speech API][Product Documentation]: Google Cloud Speech API.
+    # - [Product Documentation][]
+    #
+    # ## Quick Start
+    # In order to use this library, you first need to go through the following steps:
+    #
+    # 1. [Select or create a Cloud Platform project.](https://console.cloud.google.com/project)
+    # 2. [Enable the Google Cloud Speech API.](https://console.cloud.google.com/apis/api/speech)
+    # 3. [Setup Authentication.](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud/master/guides/authentication)
+    #
+    # ### Installation
+    # ```
+    # $ gem install google-cloud-speech
+    # ```
+    #
+    # ### Preview
+    # #### SpeechClient
+    # ```rb
+    # require "google/cloud/speech/v1"
+    #
+    # speech_client = Google::Cloud::Speech::V1::SpeechClient.new
+    # language_code = "en-US"
+    # sample_rate_hertz = 44100
+    # encoding = :FLAC
+    # config = {
+    #   language_code: language_code,
+    #   sample_rate_hertz: sample_rate_hertz,
+    #   encoding: encoding
+    # }
+    # uri = "gs://gapic-toolkit/hello.flac"
+    # audio = { uri: uri }
+    # response = speech_client.recognize(config, audio)
+    # ```
+    #
+    # ### Next Steps
+    # - Read the [Google Cloud Speech API Product documentation][Product Documentation] to learn more about the product and see How-to Guides.
+    # - View this [repository's main README](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/README.md) to see the full list of Cloud APIs that we cover.
+    #
+    # [Product Documentation]: https://cloud.google.com/speech
+    #
+    module Speech
+      # rubocop:enable LineLength
+      ##
+      # # Google Cloud Speech API Contents
+      #
+      # | Class | Description |
+      # | ----- | ----------- |
+      # | [SpeechClient][] | Google Cloud Speech API. |
+      # | [Data Types][] | Data types for Google::Cloud::Speech::V1 |
+      #
+      # [SpeechClient]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/speechclient
+      # [Data Types]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1/v1/datatypes
+      #
+      module V1
+      end
+    end
+  end
+end
 require "google/cloud/speech/v1/speech_client"

data/lib/google/cloud/speech/v1/cloud_speech_pb.rb CHANGED Viewed

@@ -36,6 +36,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
     optional :max_alternatives, :int32, 4
     optional :profanity_filter, :bool, 5
     repeated :speech_contexts, :message, 6, "google.cloud.speech.v1.SpeechContext"
+    optional :enable_word_time_offsets, :bool, 8
   end
   add_enum "google.cloud.speech.v1.RecognitionConfig.AudioEncoding" do
     value :ENCODING_UNSPECIFIED, 0
@@ -87,6 +88,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
   add_message "google.cloud.speech.v1.SpeechRecognitionAlternative" do
     optional :transcript, :string, 1
     optional :confidence, :float, 2
+    repeated :words, :message, 3, "google.cloud.speech.v1.WordInfo"
+  end
+  add_message "google.cloud.speech.v1.WordInfo" do
+    optional :start_time, :message, 1, "google.protobuf.Duration"
+    optional :end_time, :message, 2, "google.protobuf.Duration"
+    optional :word, :string, 3
   end
 end
@@ -110,6 +117,7 @@ module Google
         StreamingRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionResult").msgclass
         SpeechRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionResult").msgclass
         SpeechRecognitionAlternative = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionAlternative").msgclass
+        WordInfo = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.WordInfo").msgclass
       end
     end
   end

data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb CHANGED Viewed

@@ -56,7 +56,7 @@ module Google
         #     +audio_content+ data. The audio bytes must be encoded as specified in
         #     +RecognitionConfig+. Note: as with all bytes fields, protobuffers use a
         #     pure binary representation (not base64). See
-        #     {audio limits}[https://cloud.google.com/speech/limits#content].
+        #     [audio limits](https://cloud.google.com/speech/limits#content).
         class StreamingRecognizeRequest; end
         # Provides information to the recognizer that specifies how to process the
@@ -101,9 +101,9 @@ module Google
         # @!attribute [rw] language_code
         #   @return [String]
         #     *Required* The language of the supplied audio as a
-        #     {BCP-47}[https://www.rfc-editor.org/rfc/bcp/bcp47.txt] language tag.
+        #     [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
         #     Example: "en-US".
-        #     See {Language Support}[https://cloud.google.com/speech/docs/languages]
+        #     See [Language Support](https://cloud.google.com/speech/docs/languages)
         #     for a list of the currently supported language codes.
         # @!attribute [rw] max_alternatives
         #   @return [Integer]
@@ -122,11 +122,17 @@ module Google
         # @!attribute [rw] speech_contexts
         #   @return [Array<Google::Cloud::Speech::V1::SpeechContext>]
         #     *Optional* A means to provide context to assist the speech recognition.
+        # @!attribute [rw] enable_word_time_offsets
+        #   @return [true, false]
+        #     *Optional* If +true+, the top result includes a list of words and
+        #     the start and end time offsets (timestamps) for those words. If
+        #     +false+, no word-level time offset information is returned. The default is
+        #     +false+.
         class RecognitionConfig
           # Audio encoding of the data sent in the audio message. All encodings support
-          # only 1 channel (mono) audio. Only +FLAC+ includes a header that describes
-          # the bytes of audio that follow the header. The other encodings are raw
-          # audio bytes with no header.
+          # only 1 channel (mono) audio. Only +FLAC+ and +WAV+ include a header that
+          # describes the bytes of audio that follow the header. The other encodings
+          # are raw audio bytes with no header.
           #
           # For best results, the audio source should be captured and transmitted using
           # a lossless encoding (+FLAC+ or +LINEAR16+). Recognition accuracy may be
@@ -134,13 +140,13 @@ module Google
           # this section, are used to capture or transmit the audio, particularly if
           # background noise is present.
           module AudioEncoding
-            # Not specified. Will return result Google::Rpc::Code::INVALID_ARGUMENT.
+            # Not specified. Will return result {Google::Rpc::Code::INVALID_ARGUMENT}.
             ENCODING_UNSPECIFIED = 0
             # Uncompressed 16-bit signed little-endian samples (Linear PCM).
             LINEAR16 = 1
-            # {+FLAC+}[https://xiph.org/flac/documentation.html] (Free Lossless Audio
+            # [+FLAC+](https://xiph.org/flac/documentation.html) (Free Lossless Audio
             # Codec) is the recommended encoding because it is
             # lossless--therefore recognition is not compromised--and
             # requires only about half the bandwidth of +LINEAR16+. +FLAC+ stream
@@ -158,17 +164,17 @@ module Google
             AMR_WB = 5
             # Opus encoded audio frames in Ogg container
-            # ({OggOpus}[https://wiki.xiph.org/OggOpus]).
+            # ([OggOpus](https://wiki.xiph.org/OggOpus)).
             # +sample_rate_hertz+ must be 16000.
             OGG_OPUS = 6
             # Although the use of lossy encodings is not recommended, if a very low
             # bitrate encoding is required, +OGG_OPUS+ is highly preferred over
-            # Speex encoding. The {Speex}[https://speex.org/]  encoding supported by
+            # Speex encoding. The [Speex](https://speex.org/)  encoding supported by
             # Cloud Speech API has a header byte in each block, as in MIME type
             # +audio/x-speex-with-header-byte+.
             # It is a variant of the RTP Speex encoding defined in
-            # {RFC 5574}[https://tools.ietf.org/html/rfc5574].
+            # [RFC 5574](https://tools.ietf.org/html/rfc5574).
             # The stream is a sequence of blocks, one block per RTP packet. Each block
             # starts with a byte containing the length of the block, in bytes, followed
             # by one or more frames of Speex data, padded to an integral number of
@@ -188,13 +194,13 @@ module Google
         #     to improve the accuracy for specific words and phrases, for example, if
         #     specific commands are typically spoken by the user. This can also be used
         #     to add additional words to the vocabulary of the recognizer. See
-        #     {usage limits}[https://cloud.google.com/speech/limits#content].
+        #     [usage limits](https://cloud.google.com/speech/limits#content).
         class SpeechContext; end
         # Contains audio data in the encoding specified in the +RecognitionConfig+.
         # Either +content+ or +uri+ must be supplied. Supplying both or neither
-        # returns Google::Rpc::Code::INVALID_ARGUMENT. See
-        # {audio limits}[https://cloud.google.com/speech/limits#content].
+        # returns {Google::Rpc::Code::INVALID_ARGUMENT}. See
+        # [audio limits](https://cloud.google.com/speech/limits#content).
         # @!attribute [rw] content
         #   @return [String]
         #     The audio data bytes encoded as specified in
@@ -206,8 +212,8 @@ module Google
         #     +RecognitionConfig+. Currently, only Google Cloud Storage URIs are
         #     supported, which must be specified in the following format:
         #     +gs://bucket_name/object_name+ (other URI formats return
-        #     Google::Rpc::Code::INVALID_ARGUMENT). For more information, see
-        #     {Request URIs}[https://cloud.google.com/storage/docs/reference-uris].
+        #     {Google::Rpc::Code::INVALID_ARGUMENT}). For more information, see
+        #     [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
         class RecognitionAudio; end
         # The only message returned to the client by the +Recognize+ method. It
@@ -269,34 +275,32 @@ module Google
         # 6. results { alternatives { transcript: " that is" } stability: 0.9 }
         #    results { alternatives { transcript: " the question" } stability: 0.01 }
         #
-        # 7. speech_event_type: END_OF_SINGLE_UTTERANCE
-        #
-        # 8. results { alternatives { transcript: " that is the question"
+        # 7. results { alternatives { transcript: " that is the question"
         #                             confidence: 0.98 }
         #              alternatives { transcript: " that was the question" }
         #              is_final: true }
         #
         # Notes:
         #
-        # - Only two of the above responses #4 and #8 contain final results; they are
+        # * Only two of the above responses #4 and #7 contain final results; they are
         #   indicated by +is_final: true+. Concatenating these together generates the
         #   full transcript: "to be or not to be that is the question".
         #
-        # - The others contain interim +results+. #3 and #6 contain two interim
+        # * The others contain interim +results+. #3 and #6 contain two interim
         #   +results+: the first portion has a high stability and is less likely to
         #   change; the second portion has a low stability and is very likely to
         #   change. A UI designer might choose to show only high stability +results+.
         #
-        # - The specific +stability+ and +confidence+ values shown above are only for
+        # * The specific +stability+ and +confidence+ values shown above are only for
         #   illustrative purposes. Actual values may vary.
         #
-        # - In each response, only one of these fields will be set:
-        #     +error+,
-        #     +speech_event_type+, or
-        #     one or more (repeated) +results+.
+        # * In each response, only one of these fields will be set:
+        #   +error+,
+        #   +speech_event_type+, or
+        #   one or more (repeated) +results+.
         # @!attribute [rw] error
         #   @return [Google::Rpc::Status]
-        #     *Output-only* If set, returns a Google::Rpc::Status message that
+        #     *Output-only* If set, returns a {Google::Rpc::Status} message that
         #     specifies the error for the operation.
         # @!attribute [rw] results
         #   @return [Array<Google::Cloud::Speech::V1::StreamingRecognitionResult>]
@@ -351,6 +355,8 @@ module Google
         #   @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionAlternative>]
         #     *Output-only* May contain one or more recognition hypotheses (up to the
         #     maximum specified in +max_alternatives+).
+        #     These alternatives are ordered in terms of accuracy, with the top (first)
+        #     alternative being the most probable, as ranked by the recognizer.
         class SpeechRecognitionResult; end
         # Alternative hypotheses (a.k.a. n-best list).
@@ -363,10 +369,36 @@ module Google
         #     indicates an estimated greater likelihood that the recognized words are
         #     correct. This field is typically provided only for the top hypothesis, and
         #     only for +is_final=true+ results. Clients should not rely on the
-        #     +confidence+ field as it is not guaranteed to be accurate, or even set, in
-        #     any of the results.
+        #     +confidence+ field as it is not guaranteed to be accurate or consistent.
         #     The default of 0.0 is a sentinel value indicating +confidence+ was not set.
+        # @!attribute [rw] words
+        #   @return [Array<Google::Cloud::Speech::V1::WordInfo>]
+        #     *Output-only* A list of word-specific information for each recognized word.
         class SpeechRecognitionAlternative; end
+        # Word-specific information for recognized words. Word information is only
+        # included in the response when certain request parameters are set, such
+        # as +enable_word_time_offsets+.
+        # @!attribute [rw] start_time
+        #   @return [Google::Protobuf::Duration]
+        #     *Output-only* Time offset relative to the beginning of the audio,
+        #     and corresponding to the start of the spoken word.
+        #     This field is only set if +enable_word_time_offsets=true+ and only
+        #     in the top hypothesis.
+        #     This is an experimental feature and the accuracy of the time offset can
+        #     vary.
+        # @!attribute [rw] end_time
+        #   @return [Google::Protobuf::Duration]
+        #     *Output-only* Time offset relative to the beginning of the audio,
+        #     and corresponding to the end of the spoken word.
+        #     This field is only set if +enable_word_time_offsets=true+ and only
+        #     in the top hypothesis.
+        #     This is an experimental feature and the accuracy of the time offset can
+        #     vary.
+        # @!attribute [rw] word
+        #   @return [String]
+        #     *Output-only* The word corresponding to this set of information.
+        class WordInfo; end
       end
     end
   end

data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb CHANGED Viewed

@@ -77,7 +77,7 @@ module Google
     # If the embedded message type is well-known and has a custom JSON
     # representation, that representation will be embedded adding a field
     # +value+ which holds the custom JSON in addition to the +@type+
-    # field. Example (for message Google::Protobuf::Duration):
+    # field. Example (for message {Google::Protobuf::Duration}):
     #
     #     {
     #       "@type": "type.googleapis.com/google.protobuf.Duration",
@@ -96,7 +96,7 @@ module Google
     #       qualified name of the type (as in +path/google.protobuf.Duration+).
     #       The name should be in a canonical form (e.g., leading "." is
     #       not accepted).
-    #     * An HTTP GET on the URL must yield a Google::Protobuf::Type
+    #     * An HTTP GET on the URL must yield a {Google::Protobuf::Type}
     #       value in binary format, or produce an error.
     #     * Applications are allowed to cache lookup results based on the
     #       URL, or have them precompiled into a binary to avoid any

data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb ADDED Viewed

@@ -0,0 +1,77 @@
+# Copyright 2017, Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+module Google
+  module Protobuf
+    # A Duration represents a signed, fixed-length span of time represented
+    # as a count of seconds and fractions of seconds at nanosecond
+    # resolution. It is independent of any calendar and concepts like "day"
+    # or "month". It is related to Timestamp in that the difference between
+    # two Timestamp values is a Duration and it can be added or subtracted
+    # from a Timestamp. Range is approximately +-10,000 years.
+    #
+    # Example 1: Compute Duration from two Timestamps in pseudo code.
+    #
+    #     Timestamp start = ...;
+    #     Timestamp end = ...;
+    #     Duration duration = ...;
+    #
+    #     duration.seconds = end.seconds - start.seconds;
+    #     duration.nanos = end.nanos - start.nanos;
+    #
+    #     if (duration.seconds < 0 && duration.nanos > 0) {
+    #       duration.seconds += 1;
+    #       duration.nanos -= 1000000000;
+    #     } else if (durations.seconds > 0 && duration.nanos < 0) {
+    #       duration.seconds -= 1;
+    #       duration.nanos += 1000000000;
+    #     }
+    #
+    # Example 2: Compute Timestamp from Timestamp + Duration in pseudo code.
+    #
+    #     Timestamp start = ...;
+    #     Duration duration = ...;
+    #     Timestamp end = ...;
+    #
+    #     end.seconds = start.seconds + duration.seconds;
+    #     end.nanos = start.nanos + duration.nanos;
+    #
+    #     if (end.nanos < 0) {
+    #       end.seconds -= 1;
+    #       end.nanos += 1000000000;
+    #     } else if (end.nanos >= 1000000000) {
+    #       end.seconds += 1;
+    #       end.nanos -= 1000000000;
+    #     }
+    #
+    # Example 3: Compute Duration from datetime.timedelta in Python.
+    #
+    #     td = datetime.timedelta(days=3, minutes=10)
+    #     duration = Duration()
+    #     duration.FromTimedelta(td)
+    # @!attribute [rw] seconds
+    #   @return [Integer]
+    #     Signed seconds of the span of time. Must be from -315,576,000,000
+    #     to +315,576,000,000 inclusive.
+    # @!attribute [rw] nanos
+    #   @return [Integer]
+    #     Signed fractions of a second at nanosecond resolution of the span
+    #     of time. Durations less than one second are represented with a 0
+    #     +seconds+ field and a positive or negative +nanos+ field. For durations
+    #     of one second or more, a non-zero value for the +nanos+ field must be
+    #     of the same sign as the +seconds+ field. Must be from -999,999,999
+    #     to +999,999,999 inclusive.
+    class Duration; end
+  end
+end

data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb CHANGED Viewed

@@ -16,16 +16,16 @@ module Google
   module Rpc
     # The +Status+ type defines a logical error model that is suitable for different
     # programming environments, including REST APIs and RPC APIs. It is used by
-    # {gRPC}[https://github.com/grpc]. The error model is designed to be:
+    # [gRPC](https://github.com/grpc). The error model is designed to be:
     #
-    # - Simple to use and understand for most users
-    # - Flexible enough to meet unexpected needs
+    # * Simple to use and understand for most users
+    # * Flexible enough to meet unexpected needs
     #
     # = Overview
     #
     # The +Status+ message contains three pieces of data: error code, error message,
     # and error details. The error code should be an enum value of
-    # Google::Rpc::Code, but it may accept additional error codes if needed.  The
+    # {Google::Rpc::Code}, but it may accept additional error codes if needed.  The
     # error message should be a developer-facing English message that helps
     # developers *understand* and *resolve* the error. If a localized user-facing
     # error message is needed, put the localized message in the error details or
@@ -49,31 +49,31 @@ module Google
     #
     # Example uses of this error model include:
     #
-    # - Partial errors. If a service needs to return partial errors to the client,
-    #     it may embed the +Status+ in the normal response to indicate the partial
-    #     errors.
+    # * Partial errors. If a service needs to return partial errors to the client,
+    #   it may embed the +Status+ in the normal response to indicate the partial
+    #   errors.
     #
-    # - Workflow errors. A typical workflow has multiple steps. Each step may
-    #     have a +Status+ message for error reporting purpose.
+    # * Workflow errors. A typical workflow has multiple steps. Each step may
+    #   have a +Status+ message for error reporting purpose.
     #
-    # - Batch operations. If a client uses batch request and batch response, the
-    #     +Status+ message should be used directly inside batch response, one for
-    #     each error sub-response.
+    # * Batch operations. If a client uses batch request and batch response, the
+    #   +Status+ message should be used directly inside batch response, one for
+    #   each error sub-response.
     #
-    # - Asynchronous operations. If an API call embeds asynchronous operation
-    #     results in its response, the status of those operations should be
-    #     represented directly using the +Status+ message.
+    # * Asynchronous operations. If an API call embeds asynchronous operation
+    #   results in its response, the status of those operations should be
+    #   represented directly using the +Status+ message.
     #
-    # - Logging. If some API errors are stored in logs, the message +Status+ could
-    #     be used directly after any stripping needed for security/privacy reasons.
+    # * Logging. If some API errors are stored in logs, the message +Status+ could
+    #   be used directly after any stripping needed for security/privacy reasons.
     # @!attribute [rw] code
     #   @return [Integer]
-    #     The status code, which should be an enum value of Google::Rpc::Code.
+    #     The status code, which should be an enum value of {Google::Rpc::Code}.
     # @!attribute [rw] message
     #   @return [String]
     #     A developer-facing error message, which should be in English. Any
     #     user-facing error message should be localized and sent in the
-    #     Google::Rpc::Status#details field, or localized by the client.
+    #     {Google::Rpc::Status#details} field, or localized by the client.
     # @!attribute [rw] details
     #   @return [Array<Google::Protobuf::Any>]
     #     A list of messages that carry the error details.  There will be a

data/lib/google/cloud/speech/v1/speech_client.rb CHANGED Viewed

@@ -165,11 +165,15 @@ module Google
           # Performs synchronous speech recognition: receive results after all audio
           # has been sent and processed.
           #
-          # @param config [Google::Cloud::Speech::V1::RecognitionConfig]
+          # @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
           #   *Required* Provides information to the recognizer that specifies how to
           #   process the request.
-          # @param audio [Google::Cloud::Speech::V1::RecognitionAudio]
+          #   A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
+          #   can also be provided.
+          # @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
           #   *Required* The audio data to be recognized.
+          #   A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
+          #   can also be provided.
           # @param options [Google::Gax::CallOptions]
           #   Overrides the default settings for this call, e.g, timeout,
           #   retries, etc.
@@ -178,32 +182,28 @@ module Google
           # @example
           #   require "google/cloud/speech/v1"
           #
-          #   AudioEncoding = Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding
-          #   RecognitionAudio = Google::Cloud::Speech::V1::RecognitionAudio
-          #   RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
-          #   SpeechClient = Google::Cloud::Speech::V1::SpeechClient
-          #
-          #   speech_client = SpeechClient.new
-          #   encoding = AudioEncoding::FLAC
+          #   speech_client = Google::Cloud::Speech::V1::SpeechClient.new
+          #   encoding = :FLAC
           #   sample_rate_hertz = 44100
           #   language_code = "en-US"
-          #   config = RecognitionConfig.new
-          #   config.encoding = encoding
-          #   config.sample_rate_hertz = sample_rate_hertz
-          #   config.language_code = language_code
+          #   config = {
+          #     encoding: encoding,
+          #     sample_rate_hertz: sample_rate_hertz,
+          #     language_code: language_code
+          #   }
           #   uri = "gs://bucket_name/file_name.flac"
-          #   audio = RecognitionAudio.new
-          #   audio.uri = uri
+          #   audio = { uri: uri }
           #   response = speech_client.recognize(config, audio)
           def recognize \
               config,
               audio,
               options: nil
-            req = Google::Cloud::Speech::V1::RecognizeRequest.new({
+            req = {
               config: config,
               audio: audio
-            }.delete_if { |_, v| v.nil? })
+            }.delete_if { |_, v| v.nil? }
+            req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::RecognizeRequest)
             @recognize.call(req, options)
           end
@@ -212,11 +212,15 @@ module Google
           # +Operation.error+ or an +Operation.response+ which contains
           # a +LongRunningRecognizeResponse+ message.
           #
-          # @param config [Google::Cloud::Speech::V1::RecognitionConfig]
+          # @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
           #   *Required* Provides information to the recognizer that specifies how to
           #   process the request.
-          # @param audio [Google::Cloud::Speech::V1::RecognitionAudio]
+          #   A hash of the same form as `Google::Cloud::Speech::V1::RecognitionConfig`
+          #   can also be provided.
+          # @param audio [Google::Cloud::Speech::V1::RecognitionAudio | Hash]
           #   *Required* The audio data to be recognized.
+          #   A hash of the same form as `Google::Cloud::Speech::V1::RecognitionAudio`
+          #   can also be provided.
           # @param options [Google::Gax::CallOptions]
           #   Overrides the default settings for this call, e.g, timeout,
           #   retries, etc.
@@ -225,22 +229,17 @@ module Google
           # @example
           #   require "google/cloud/speech/v1"
           #
-          #   AudioEncoding = Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding
-          #   RecognitionAudio = Google::Cloud::Speech::V1::RecognitionAudio
-          #   RecognitionConfig = Google::Cloud::Speech::V1::RecognitionConfig
-          #   SpeechClient = Google::Cloud::Speech::V1::SpeechClient
-          #
-          #   speech_client = SpeechClient.new
-          #   encoding = AudioEncoding::FLAC
+          #   speech_client = Google::Cloud::Speech::V1::SpeechClient.new
+          #   encoding = :FLAC
           #   sample_rate_hertz = 44100
           #   language_code = "en-US"
-          #   config = RecognitionConfig.new
-          #   config.encoding = encoding
-          #   config.sample_rate_hertz = sample_rate_hertz
-          #   config.language_code = language_code
+          #   config = {
+          #     encoding: encoding,
+          #     sample_rate_hertz: sample_rate_hertz,
+          #     language_code: language_code
+          #   }
           #   uri = "gs://bucket_name/file_name.flac"
-          #   audio = RecognitionAudio.new
-          #   audio.uri = uri
+          #   audio = { uri: uri }
           #
           #   # Register a callback during the method call.
           #   operation = speech_client.long_running_recognize(config, audio) do |op|
@@ -273,10 +272,11 @@ module Google
               config,
               audio,
               options: nil
-            req = Google::Cloud::Speech::V1::LongRunningRecognizeRequest.new({
+            req = {
               config: config,
               audio: audio
-            }.delete_if { |_, v| v.nil? })
+            }.delete_if { |_, v| v.nil? }
+            req = Google::Gax::to_proto(req, Google::Cloud::Speech::V1::LongRunningRecognizeRequest)
             operation = Google::Gax::Operation.new(
               @long_running_recognize.call(req, options),
               @operations_client,
@@ -309,11 +309,8 @@ module Google
           # @example
           #   require "google/cloud/speech/v1"
           #
-          #   SpeechClient = Google::Cloud::Speech::V1::SpeechClient
-          #   StreamingRecognizeRequest = Google::Cloud::Speech::V1::StreamingRecognizeRequest
-          #
-          #   speech_client = SpeechClient.new
-          #   request = StreamingRecognizeRequest.new
+          #   speech_client = Google::Cloud::Speech::V1::SpeechClient.new
+          #   request = {}
           #   requests = [request]
           #   speech_client.streaming_recognize(requests).each do |element|
           #     # Process element.

data/lib/google/cloud/speech/version.rb CHANGED Viewed

@@ -16,7 +16,7 @@
 module Google
   module Cloud
     module Speech
-      VERSION = "0.25.0"
+      VERSION = "0.26.0"
     end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: google-cloud-speech
 version: !ruby/object:Gem::Version
-  version: 0.25.0
+  version: 0.26.0
 platform: ruby
 authors:
 - Mike Moore
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-07-11 00:00:00.000000000 Z
+date: 2017-07-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: google-cloud-core
@@ -179,6 +179,7 @@ files:
 - lib/google-cloud-speech.rb
 - lib/google/cloud/speech.rb
 - lib/google/cloud/speech/audio.rb
+- lib/google/cloud/speech/convert.rb
 - lib/google/cloud/speech/credentials.rb
 - lib/google/cloud/speech/operation.rb
 - lib/google/cloud/speech/project.rb
@@ -190,6 +191,7 @@ files:
 - lib/google/cloud/speech/v1/cloud_speech_services_pb.rb
 - lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb
 - lib/google/cloud/speech/v1/doc/google/protobuf/any.rb
+- lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb
 - lib/google/cloud/speech/v1/doc/google/rpc/status.rb
 - lib/google/cloud/speech/v1/speech_client.rb
 - lib/google/cloud/speech/v1/speech_client_config.json