RubyGems - google-cloud-speech-v1 - Versions diffs - 0.11.0 → 0.12.0 - Mend

google-cloud-speech-v1 0.11.0 → 0.12.0

Files changed (6) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/lib/google/cloud/speech/v1/cloud_speech_pb.rb +11 -0
data/lib/google/cloud/speech/v1/version.rb +1 -1
data/proto_docs/google/cloud/speech/v1/cloud_speech.rb +60 -15
metadata +5 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3b8d30c86908298f03ad30a9bac371535967be78d2d8876192b4e95a24c60b3a
-  data.tar.gz: 390bb4dedb2679807486df2974f614e66e8ffb1b82d11e7151cd78fd814533a7
+  metadata.gz: 5c3b3e6ee54f4e2bba948193c467882bbf4e47dd69ceac3bbe160cf204db2a9a
+  data.tar.gz: 55978252b10819ed6cfc5508227fecfa57ba4f761af83c4f4ec54cf12832451c
 SHA512:
-  metadata.gz: bd36ba2e48c2ad07491faa766d9dc4ffc71db8f15f903044feb13bcb137906d7322ab0c10da91c041c0c73d54508a2f80b4942c4df06d8f93889733106560bcd
-  data.tar.gz: 34cf9fa2aa0549dccc6337151299fb93bdff1ea6069972af69abca72ac2d1002209a967405fddb85ea30ed8dcf852baefed22e83ecfc698c0882e3ec1e5b935b
+  metadata.gz: b64bf3e6385c2f25add2efbfd16be36e7eaaa8f4c4d2078ad3022f9982d6574ac7e6387272c234033980524e8ff6b103ea8db9cf5280b6188105b9f69e5140c3
+  data.tar.gz: 7d0c1743806c0aa83f873773dae32393d63a921b7a90a0beeeab241bb3ec60a7b5350d154b3cebe8e67af125b219b4519bd6fdefacd4b7fa4aba368d90dbd102

data/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ruby Client for the Cloud Speech-to-Text V1 API
-API Client library for the Cloud Speech-to-Text V1 API
+Converts audio to text by applying powerful neural network models.
 Google Speech-to-Text enables developers to convert audio to text by applying powerful neural network models in an easy-to-use API. The API recognizes more than 120 languages and variants to support your global user base. You can enable voice command-and-control, transcribe audio from call centers, and more. It can process real-time streaming or prerecorded audio, using Google's machine learning technology.

data/lib/google/cloud/speech/v1/cloud_speech_pb.rb CHANGED Viewed

@@ -39,6 +39,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
       optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
       optional :single_utterance, :bool, 2
       optional :interim_results, :bool, 3
+      optional :enable_voice_activity_events, :bool, 5
+      optional :voice_activity_timeout, :message, 6, "google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout"
+    end
+    add_message "google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout" do
+      optional :speech_start_timeout, :message, 1, "google.protobuf.Duration"
+      optional :speech_end_timeout, :message, 2, "google.protobuf.Duration"
     end
     add_message "google.cloud.speech.v1.RecognitionConfig" do
       optional :encoding, :enum, 1, "google.cloud.speech.v1.RecognitionConfig.AudioEncoding"
@@ -153,6 +159,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
       optional :error, :message, 1, "google.rpc.Status"
       repeated :results, :message, 2, "google.cloud.speech.v1.StreamingRecognitionResult"
       optional :speech_event_type, :enum, 4, "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType"
+      optional :speech_event_time, :message, 8, "google.protobuf.Duration"
       optional :total_billed_time, :message, 5, "google.protobuf.Duration"
       optional :speech_adaptation_info, :message, 9, "google.cloud.speech.v1.SpeechAdaptationInfo"
       optional :request_id, :int64, 10
@@ -160,6 +167,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
     add_enum "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType" do
       value :SPEECH_EVENT_UNSPECIFIED, 0
       value :END_OF_SINGLE_UTTERANCE, 1
+      value :SPEECH_ACTIVITY_BEGIN, 2
+      value :SPEECH_ACTIVITY_END, 3
+      value :SPEECH_ACTIVITY_TIMEOUT, 4
     end
     add_message "google.cloud.speech.v1.StreamingRecognitionResult" do
       repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
@@ -203,6 +213,7 @@ module Google
         TranscriptOutputConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.TranscriptOutputConfig").msgclass
         StreamingRecognizeRequest = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeRequest").msgclass
         StreamingRecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig").msgclass
+        StreamingRecognitionConfig::VoiceActivityTimeout = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout").msgclass
         RecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig").msgclass
         RecognitionConfig::AudioEncoding = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig.AudioEncoding").enummodule
         SpeakerDiarizationConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeakerDiarizationConfig").msgclass

data/lib/google/cloud/speech/v1/version.rb CHANGED Viewed

@@ -21,7 +21,7 @@ module Google
   module Cloud
     module Speech
       module V1
-        VERSION = "0.11.0"
+        VERSION = "0.12.0"
       end
     end
   end

data/proto_docs/google/cloud/speech/v1/cloud_speech.rb CHANGED Viewed

@@ -122,9 +122,30 @@ module Google
         #     returned as they become available (these interim results are indicated with
         #     the `is_final=false` flag).
         #     If `false` or omitted, only `is_final=true` result(s) are returned.
+        # @!attribute [rw] enable_voice_activity_events
+        #   @return [::Boolean]
+        #     If `true`, responses with voice activity speech events will be returned as
+        #     they are detected.
+        # @!attribute [rw] voice_activity_timeout
+        #   @return [::Google::Cloud::Speech::V1::StreamingRecognitionConfig::VoiceActivityTimeout]
+        #     If set, the server will automatically close the stream after the specified
+        #     duration has elapsed after the last VOICE_ACTIVITY speech event has been
+        #     sent. The field `voice_activity_events` must also be set to true.
         class StreamingRecognitionConfig
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
+          # Events that a timeout can be set on for voice activity.
+          # @!attribute [rw] speech_start_timeout
+          #   @return [::Google::Protobuf::Duration]
+          #     Duration to timeout the stream if no speech begins.
+          # @!attribute [rw] speech_end_timeout
+          #   @return [::Google::Protobuf::Duration]
+          #     Duration to timeout the stream after speech ends.
+          class VoiceActivityTimeout
+            include ::Google::Protobuf::MessageExts
+            extend ::Google::Protobuf::MessageExts::ClassMethods
+          end
         end
         # Provides information to the recognizer that specifies how to process the
@@ -133,7 +154,8 @@ module Google
         #   @return [::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding]
         #     Encoding of audio data sent in all `RecognitionAudio` messages.
         #     This field is optional for `FLAC` and `WAV` audio files and required
-        #     for all other audio formats. For details, see {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
+        #     for all other audio formats. For details, see
+        #     {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
         # @!attribute [rw] sample_rate_hertz
         #   @return [::Integer]
         #     Sample rate in Hertz of the audio data sent in all
@@ -142,7 +164,8 @@ module Google
         #     source to 16000 Hz. If that's not possible, use the native sample rate of
         #     the audio source (instead of re-sampling).
         #     This field is optional for FLAC and WAV audio files, but is
-        #     required for all other audio formats. For details, see {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
+        #     required for all other audio formats. For details, see
+        #     {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
         # @!attribute [rw] audio_channel_count
         #   @return [::Integer]
         #     The number of channels in the input audio data.
@@ -346,7 +369,8 @@ module Google
           # an `AudioEncoding` when you send  send `FLAC` or `WAV` audio, the
           # encoding configuration must match the encoding described in the audio
           # header; otherwise the request returns an
-          # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
+          # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
+          # code.
           module AudioEncoding
             # Not specified.
             ENCODING_UNSPECIFIED = 0
@@ -585,8 +609,8 @@ module Google
         # Contains audio data in the encoding specified in the `RecognitionConfig`.
         # Either `content` or `uri` must be supplied. Supplying both or neither
-        # returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
-        # [content limits](https://cloud.google.com/speech-to-text/quotas#content).
+        # returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
+        # See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
         # @!attribute [rw] content
         #   @return [::String]
         #     The audio data bytes encoded as specified in
@@ -599,8 +623,9 @@ module Google
         #     Currently, only Google Cloud Storage URIs are
         #     supported, which must be specified in the following format:
         #     `gs://bucket_name/object_name` (other URI formats return
-        #     [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
-        #     [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
+        #     [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
+        #     For more information, see [Request
+        #     URIs](https://cloud.google.com/storage/docs/reference-uris).
         class RecognitionAudio
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -673,8 +698,8 @@ module Google
         #     Time of the most recent processing update.
         # @!attribute [r] uri
         #   @return [::String]
-        #     Output only. The URI of the audio file being transcribed. Empty if the audio was sent
-        #     as byte content.
+        #     Output only. The URI of the audio file being transcribed. Empty if the
+        #     audio was sent as byte content.
         class LongRunningRecognizeMetadata
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -742,6 +767,9 @@ module Google
         # @!attribute [rw] speech_event_type
         #   @return [::Google::Cloud::Speech::V1::StreamingRecognizeResponse::SpeechEventType]
         #     Indicates the type of speech event.
+        # @!attribute [rw] speech_event_time
+        #   @return [::Google::Protobuf::Duration]
+        #     Time offset between the beginning of the audio and event emission.
         # @!attribute [rw] total_billed_time
         #   @return [::Google::Protobuf::Duration]
         #     When available, billed audio seconds for the stream.
@@ -770,6 +798,23 @@ module Google
             # until the server closes the gRPC connection. This event is only sent if
             # `single_utterance` was set to `true`, and is not used otherwise.
             END_OF_SINGLE_UTTERANCE = 1
+            # This event indicates that the server has detected the beginning of human
+            # voice activity in the stream. This event can be returned multiple times
+            # if speech starts and stops repeatedly throughout the stream. This event
+            # is only sent if `voice_activity_events` is set to true.
+            SPEECH_ACTIVITY_BEGIN = 2
+            # This event indicates that the server has detected the end of human voice
+            # activity in the stream. This event can be returned multiple times if
+            # speech starts and stops repeatedly throughout the stream. This event is
+            # only sent if `voice_activity_events` is set to true.
+            SPEECH_ACTIVITY_END = 3
+            # This event indicates that the user-set timeout for speech activity begin
+            # or end has exceeded. Upon receiving this event, the client is expected to
+            # send a half close. Further audio will not be processed.
+            SPEECH_ACTIVITY_TIMEOUT = 4
           end
         end
@@ -806,9 +851,9 @@ module Google
         #     For audio_channel_count = N, its output values can range from '1' to 'N'.
         # @!attribute [r] language_code
         #   @return [::String]
-        #     Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
-        #     of the language in this result. This language code was detected to have
-        #     the most likelihood of being spoken in the audio.
+        #     Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
+        #     language tag of the language in this result. This language code was
+        #     detected to have the most likelihood of being spoken in the audio.
         class StreamingRecognitionResult
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -832,9 +877,9 @@ module Google
         #     beginning of the audio.
         # @!attribute [r] language_code
         #   @return [::String]
-        #     Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
-        #     of the language in this result. This language code was detected to have
-        #     the most likelihood of being spoken in the audio.
+        #     Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
+        #     language tag of the language in this result. This language code was
+        #     detected to have the most likelihood of being spoken in the audio.
         class SpeechRecognitionResult
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: google-cloud-speech-v1
 version: !ruby/object:Gem::Version
-  version: 0.11.0
+  version: 0.12.0
 platform: ruby
 authors:
 - Google LLC
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-02-23 00:00:00.000000000 Z
+date: 2023-02-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: gapic-common
@@ -16,7 +16,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.17.1
+        version: 0.18.0
     - - "<"
       - !ruby/object:Gem::Version
         version: 2.a
@@ -26,7 +26,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.17.1
+        version: 0.18.0
     - - "<"
       - !ruby/object:Gem::Version
         version: 2.a
@@ -236,5 +236,5 @@ requirements: []
 rubygems_version: 3.4.2
 signing_key:
 specification_version: 4
-summary: API Client library for the Cloud Speech-to-Text V1 API
+summary: Converts audio to text by applying powerful neural network models.
 test_files: []