RubyGems - google-cloud-speech-v2 - Versions diffs - 0.1.0 → 0.3.0 - Mend

google-cloud-speech-v2 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/README.md +3 -3
data/lib/google/cloud/speech/v2/bindings_override.rb +102 -0
data/lib/google/cloud/speech/v2/cloud_speech_pb.rb +23 -0
data/lib/google/cloud/speech/v2/cloud_speech_services_pb.rb +1 -1
data/lib/google/cloud/speech/v2/rest.rb +38 -0
data/lib/google/cloud/speech/v2/speech/client.rb +92 -81
data/lib/google/cloud/speech/v2/speech/operations.rb +14 -16
data/lib/google/cloud/speech/v2/speech/rest/client.rb +2099 -0
data/lib/google/cloud/speech/v2/speech/rest/operations.rb +793 -0
data/lib/google/cloud/speech/v2/speech/rest/service_stub.rb +1358 -0
data/lib/google/cloud/speech/v2/speech/rest.rb +54 -0
data/lib/google/cloud/speech/v2/speech.rb +7 -1
data/lib/google/cloud/speech/v2/version.rb +1 -1
data/lib/google/cloud/speech/v2.rb +7 -2
data/proto_docs/google/api/client.rb +324 -0
data/proto_docs/google/api/launch_stage.rb +71 -0
data/proto_docs/google/cloud/speech/v2/cloud_speech.rb +140 -36
data/proto_docs/google/rpc/status.rb +4 -2
metadata +36 -8

data/proto_docs/google/cloud/speech/v2/cloud_speech.rb CHANGED Viewed

@@ -275,22 +275,44 @@ module Google
         #       When using this model, the service will stop transcribing audio after the
         #       first utterance is detected and completed.
         #
-        #     When using this model,
-        #     {::Google::Cloud::Speech::V2::RecognitionFeatures::MultiChannelMode::SEPARATE_RECOGNITION_PER_CHANNEL SEPARATE_RECOGNITION_PER_CHANNEL}
-        #     is not supported; multi-channel audio is accepted, but only the first
-        #     channel will be processed and transcribed.
+        #       When using this model,
+        #       {::Google::Cloud::Speech::V2::RecognitionFeatures::MultiChannelMode::SEPARATE_RECOGNITION_PER_CHANNEL SEPARATE_RECOGNITION_PER_CHANNEL}
+        #       is not supported; multi-channel audio is accepted, but only the first
+        #       channel will be processed and transcribed.
+        #
+        #     - `telephony`
+        #
+        #       Best for audio that originated from a phone call (typically recorded at
+        #       an 8khz sampling rate).
+        #
+        #     - `medical_conversation`
+        #
+        #       For conversations between a medical provider—for example, a doctor or
+        #       nurse—and a patient. Use this model when both a provider and a patient
+        #       are speaking. Words uttered by each speaker are automatically detected
+        #       and labeled in the returned transcript.
+        #
+        #       For supported features please see [medical models
+        #       documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
+        #
+        #     - `medical_dictation`
+        #
+        #       For dictated notes spoken by a single medical provider—for example, a
+        #       doctor dictating notes about a patient's blood test results.
+        #
+        #       For supported features please see [medical models
+        #       documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
+        #
+        #     - `usm`
+        #
+        #       The next generation of Speech-to-Text models from Google.
         # @!attribute [rw] language_codes
         #   @return [::Array<::String>]
         #     Required. The language of the supplied audio as a
         #     [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
         #
-        #     Supported languages:
-        #
-        #     - `en-US`
-        #
-        #     - `en-GB`
-        #
-        #     - `fr-FR`
+        #     Supported languages for each model are listed at:
+        #     https://cloud.google.com/speech-to-text/docs/languages
         #
         #     If additional languages are provided, recognition result will contain
         #     recognition in the most likely language detected. The recognition result
@@ -373,14 +395,23 @@ module Google
         # Automatically detected decoding parameters.
         # Supported for the following encodings:
+        #
         # * WAV_LINEAR16: 16-bit signed little-endian PCM samples in a WAV container.
+        #
         # * WAV_MULAW: 8-bit companded mulaw samples in a WAV container.
+        #
         # * WAV_ALAW: 8-bit companded alaw samples in a WAV container.
+        #
         # * RFC4867_5_AMR: AMR frames with an rfc4867.5 header.
+        #
         # * RFC4867_5_AMRWB: AMR-WB frames with an rfc4867.5 header.
+        #
         # * FLAC: FLAC frames in the "native FLAC" container format.
+        #
         # * MP3: MPEG audio frames with optional (ignored) ID3 metadata.
+        #
         # * OGG_OPUS: Opus audio frames in an Ogg container.
+        #
         # * WEBM_OPUS: Opus audio frames in a WebM container.
         class AutoDetectDecodingConfig
           include ::Google::Protobuf::MessageExts
@@ -398,16 +429,24 @@ module Google
         #     sampling rate of the audio source to 16000 Hz. If that's not possible, use
         #     the native sample rate of the audio source (instead of re-sampling).
         #     Supported for the following encodings:
+        #
         #     * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
+        #
         #     * MULAW: Headerless 8-bit companded mulaw samples.
+        #
         #     * ALAW: Headerless 8-bit companded alaw samples.
         # @!attribute [rw] audio_channel_count
         #   @return [::Integer]
         #     Number of channels present in the audio data sent for recognition.
         #     Supported for the following encodings:
+        #
         #     * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
+        #
         #     * MULAW: Headerless 8-bit companded mulaw samples.
+        #
         #     * ALAW: Headerless 8-bit companded alaw samples.
+        #
+        #     The maximum allowed value is 8.
         class ExplicitDecodingConfig
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -433,7 +472,7 @@ module Google
         #   @return [::Integer]
         #     Required. Minimum number of speakers in the conversation. This range gives
         #     you more flexibility by allowing the system to automatically determine the
-        #     correct number of speakers. If not set, the default value is 2.
+        #     correct number of speakers.
         #
         #     To fix the number of speakers detected in the audio, set
         #     `min_speaker_count` = `max_speaker_count`.
@@ -521,28 +560,28 @@ module Google
         end
         # Provides "hints" to the speech recognizer to favor specific words and phrases
-        # in the results. Phrase sets can be specified as an inline resource, or a
-        # reference to an existing phrase set resource.
+        # in the results. PhraseSets can be specified as an inline resource, or a
+        # reference to an existing PhraseSet resource.
         # @!attribute [rw] phrase_sets
         #   @return [::Array<::Google::Cloud::Speech::V2::SpeechAdaptation::AdaptationPhraseSet>]
-        #     A list of inline or referenced phrase sets.
+        #     A list of inline or referenced PhraseSets.
         # @!attribute [rw] custom_classes
         #   @return [::Array<::Google::Cloud::Speech::V2::CustomClass>]
-        #     A list of inline custom classes. Existing custom class resources can be
-        #     referenced directly in a phrase set.
+        #     A list of inline CustomClasses. Existing CustomClass resources can be
+        #     referenced directly in a PhraseSet.
         class SpeechAdaptation
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
-          # A biasing phrase set, which can be either a string referencing the name of
-          # an existing phrase set resource, or an inline definition of a phrase set.
+          # A biasing PhraseSet, which can be either a string referencing the name of
+          # an existing PhraseSets resource, or an inline definition of a PhraseSet.
           # @!attribute [rw] phrase_set
           #   @return [::String]
-          #     The name of an existing phrase set resource. The user must have read
+          #     The name of an existing PhraseSet resource. The user must have read
           #     access to the resource and it must not be deleted.
           # @!attribute [rw] inline_phrase_set
           #   @return [::Google::Cloud::Speech::V2::PhraseSet]
-          #     An inline defined phrase set.
+          #     An inline defined PhraseSet.
           class AdaptationPhraseSet
             include ::Google::Protobuf::MessageExts
             extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -651,9 +690,9 @@ module Google
         # @!attribute [rw] words
         #   @return [::Array<::Google::Cloud::Speech::V2::WordInfo>]
         #     A list of word-specific information for each recognized word.
-        #     When
-        #     [enable_speaker_diarization][google.cloud.speech.v2.SpeakerDiarizationConfig.enable_speaker_diarization]
-        #     is true, you will see all the words from the beginning of the audio.
+        #     When the
+        #     {::Google::Cloud::Speech::V2::SpeakerDiarizationConfig SpeakerDiarizationConfig}
+        #     is set, you will see all the words from the beginning of the audio.
         class SpeechRecognitionAlternative
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -694,8 +733,8 @@ module Google
         #     A distinct label is assigned for every speaker within the audio. This field
         #     specifies which one of those speakers was detected to have spoken this
         #     word. `speaker_label` is set if
-        #     [enable_speaker_diarization][google.cloud.speech.v2.SpeakerDiarizationConfig.enable_speaker_diarization]
-        #     is `true` and only in the top alternative.
+        #     {::Google::Cloud::Speech::V2::SpeakerDiarizationConfig SpeakerDiarizationConfig}
+        #     is given and only in the top alternative.
         class WordInfo
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -795,9 +834,9 @@ module Google
         #     of the recognizer during this recognition request. If no mask is provided,
         #     all non-default valued fields in
         #     {::Google::Cloud::Speech::V2::StreamingRecognitionConfig#config config} override
-        #     the values in the recognizer for this recognition request. If a mask is
+        #     the values in the Recognizer for this recognition request. If a mask is
         #     provided, only the fields listed in the mask override the config in the
-        #     recognizer for this recognition request. If a wildcard (`*`) is provided,
+        #     Recognizer for this recognition request. If a wildcard (`*`) is provided,
         #     {::Google::Cloud::Speech::V2::StreamingRecognitionConfig#config config}
         #     completely overrides and replaces the config in the recognizer for this
         #     recognition request.
@@ -841,6 +880,7 @@ module Google
         # @!attribute [rw] audio
         #   @return [::String]
         #     Inline audio bytes to be Recognized.
+        #     Maximum size for this field is 15 KB per request.
         class StreamingRecognizeRequest
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -878,17 +918,57 @@ module Google
         # @!attribute [rw] files
         #   @return [::Array<::Google::Cloud::Speech::V2::BatchRecognizeFileMetadata>]
         #     Audio files with file metadata for ASR.
+        #     The maximum number of files allowed to be specified is 5.
+        # @!attribute [rw] recognition_output_config
+        #   @return [::Google::Cloud::Speech::V2::RecognitionOutputConfig]
+        #     Configuration options for where to output the transcripts of each file.
         class BatchRecognizeRequest
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
         end
+        # Output configurations for Cloud Storage.
+        # @!attribute [rw] uri
+        #   @return [::String]
+        #     The Cloud Storage URI prefix with which recognition results will be
+        #     written.
+        class GcsOutputConfig
+          include ::Google::Protobuf::MessageExts
+          extend ::Google::Protobuf::MessageExts::ClassMethods
+        end
+        # Output configurations for inline response.
+        class InlineOutputConfig
+          include ::Google::Protobuf::MessageExts
+          extend ::Google::Protobuf::MessageExts::ClassMethods
+        end
+        # Configuration options for the output(s) of recognition.
+        # @!attribute [rw] gcs_output_config
+        #   @return [::Google::Cloud::Speech::V2::GcsOutputConfig]
+        #     If this message is populated, recognition results are written to the
+        #     provided Google Cloud Storage URI.
+        # @!attribute [rw] inline_response_config
+        #   @return [::Google::Cloud::Speech::V2::InlineOutputConfig]
+        #     If this message is populated, recognition results are provided in the
+        #     {::Google::Cloud::Speech::V2::BatchRecognizeResponse BatchRecognizeResponse}
+        #     message of the Operation when completed. This is only supported when
+        #     calling {::Google::Cloud::Speech::V2::Speech::Client#batch_recognize BatchRecognize}
+        #     with just one audio file.
+        class RecognitionOutputConfig
+          include ::Google::Protobuf::MessageExts
+          extend ::Google::Protobuf::MessageExts::ClassMethods
+        end
         # Response message for
         # {::Google::Cloud::Speech::V2::Speech::Client#batch_recognize BatchRecognize} that is
         # packaged into a longrunning {::Google::Longrunning::Operation Operation}.
         # @!attribute [rw] results
         #   @return [::Google::Protobuf::Map{::String => ::Google::Cloud::Speech::V2::BatchRecognizeFileResult}]
         #     Map from filename to the final result for that file.
+        # @!attribute [rw] total_billed_duration
+        #   @return [::Google::Protobuf::Duration]
+        #     When available, billed audio seconds for the corresponding request.
         class BatchRecognizeResponse
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -903,13 +983,36 @@ module Google
           end
         end
+        # Output type for Cloud Storage of BatchRecognize transcripts. Though this
+        # proto isn't returned in this API anywhere, the Cloud Storage transcripts will
+        # be this proto serialized and should be parsed as such.
+        # @!attribute [rw] results
+        #   @return [::Array<::Google::Cloud::Speech::V2::SpeechRecognitionResult>]
+        #     Sequential list of transcription results corresponding to sequential
+        #     portions of audio.
+        # @!attribute [rw] metadata
+        #   @return [::Google::Cloud::Speech::V2::RecognitionResponseMetadata]
+        #     Metadata about the recognition.
+        class BatchRecognizeResults
+          include ::Google::Protobuf::MessageExts
+          extend ::Google::Protobuf::MessageExts::ClassMethods
+        end
         # Final results for a single file.
         # @!attribute [rw] uri
         #   @return [::String]
-        #     The GCS URI to which recognition results were written.
+        #     The Cloud Storage URI to which recognition results were written.
         # @!attribute [rw] error
         #   @return [::Google::Rpc::Status]
         #     Error if one was encountered.
+        # @!attribute [rw] metadata
+        #   @return [::Google::Cloud::Speech::V2::RecognitionResponseMetadata]
+        # @!attribute [rw] transcript
+        #   @return [::Google::Cloud::Speech::V2::BatchRecognizeResults]
+        #     The transcript for the audio file. This is populated only when
+        #     {::Google::Cloud::Speech::V2::InlineOutputConfig InlineOutputConfig} is set in
+        #     the
+        #     [RecognitionOutputConfig][[google.cloud.speech.v2.RecognitionOutputConfig].
         class BatchRecognizeFileResult
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -925,7 +1028,7 @@ module Google
         #     Error if one was encountered.
         # @!attribute [rw] uri
         #   @return [::String]
-        #     The GCS URI to which recognition results will be written.
+        #     The Cloud Storage URI to which recognition results will be written.
         class BatchRecognizeTranscriptionMetadata
           include ::Google::Protobuf::MessageExts
           extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -1290,7 +1393,8 @@ module Google
         #     phrase will be recognized over other similar sounding phrases. The higher
         #     the boost, the higher the chance of false positive recognition as well.
         #     Valid `boost` values are between 0 (exclusive) and 20. We recommend using a
-        #     binary search approach to finding the optimal value for your use case.
+        #     binary search approach to finding the optimal value for your use case as
+        #     well as adding phrases both with and without boost to your requests.
         # @!attribute [rw] display_name
         #   @return [::String]
         #     User-settable, human-readable name for the PhraseSet. Must be 63
@@ -1358,11 +1462,11 @@ module Google
           #     be recognized over other similar sounding phrases. The higher the boost,
           #     the higher the chance of false positive recognition as well. Negative
           #     boost values would correspond to anti-biasing. Anti-biasing is not
-          #     enabled, so negative boost will simply be ignored. Though `boost` can
-          #     accept a wide range of positive values, most use cases are best served
-          #     with values between 0 and 20. We recommend using a binary search approach
-          #     to finding the optimal value for your use case. Speech recognition
-          #     will skip PhraseSets with a boost value of 0.
+          #     enabled, so negative boost values will return an error. Boost values must
+          #     be between 0 and 20. Any values outside that range will return an error.
+          #     We recommend using a binary search approach to finding the optimal value
+          #     for your use case as well as adding phrases both with and without boost
+          #     to your requests.
           class Phrase
             include ::Google::Protobuf::MessageExts
             extend ::Google::Protobuf::MessageExts::ClassMethods

data/proto_docs/google/rpc/status.rb CHANGED Viewed

@@ -28,12 +28,14 @@ module Google
     # [API Design Guide](https://cloud.google.com/apis/design/errors).
     # @!attribute [rw] code
     #   @return [::Integer]
-    #     The status code, which should be an enum value of [google.rpc.Code][google.rpc.Code].
+    #     The status code, which should be an enum value of
+    #     [google.rpc.Code][google.rpc.Code].
     # @!attribute [rw] message
     #   @return [::String]
     #     A developer-facing error message, which should be in English. Any
     #     user-facing error message should be localized and sent in the
-    #     {::Google::Rpc::Status#details google.rpc.Status.details} field, or localized by the client.
+    #     {::Google::Rpc::Status#details google.rpc.Status.details} field, or localized
+    #     by the client.
     # @!attribute [rw] details
     #   @return [::Array<::Google::Protobuf::Any>]
     #     A list of messages that carry the error details.  There is a common set of

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: google-cloud-speech-v2
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.3.0
 platform: ruby
 authors:
 - Google LLC
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-10-03 00:00:00.000000000 Z
+date: 2023-03-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: gapic-common
@@ -16,7 +16,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: '0.12'
+        version: 0.18.0
     - - "<"
       - !ruby/object:Gem::Version
         version: 2.a
@@ -26,7 +26,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: '0.12'
+        version: 0.18.0
     - - "<"
       - !ruby/object:Gem::Version
         version: 2.a
@@ -44,20 +44,40 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '1.0'
+- !ruby/object:Gem::Dependency
+  name: google-cloud-location
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0.4'
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: 2.a
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0.4'
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: 2.a
 - !ruby/object:Gem::Dependency
   name: google-style
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 1.26.1
+        version: 1.26.3
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 1.26.1
+        version: 1.26.3
 - !ruby/object:Gem::Dependency
   name: minitest
   requirement: !ruby/object:Gem::Requirement
@@ -175,16 +195,24 @@ files:
 - README.md
 - lib/google-cloud-speech-v2.rb
 - lib/google/cloud/speech/v2.rb
+- lib/google/cloud/speech/v2/bindings_override.rb
 - lib/google/cloud/speech/v2/cloud_speech_pb.rb
 - lib/google/cloud/speech/v2/cloud_speech_services_pb.rb
+- lib/google/cloud/speech/v2/rest.rb
 - lib/google/cloud/speech/v2/speech.rb
 - lib/google/cloud/speech/v2/speech/client.rb
 - lib/google/cloud/speech/v2/speech/credentials.rb
 - lib/google/cloud/speech/v2/speech/operations.rb
 - lib/google/cloud/speech/v2/speech/paths.rb
+- lib/google/cloud/speech/v2/speech/rest.rb
+- lib/google/cloud/speech/v2/speech/rest/client.rb
+- lib/google/cloud/speech/v2/speech/rest/operations.rb
+- lib/google/cloud/speech/v2/speech/rest/service_stub.rb
 - lib/google/cloud/speech/v2/version.rb
 - proto_docs/README.md
+- proto_docs/google/api/client.rb
 - proto_docs/google/api/field_behavior.rb
+- proto_docs/google/api/launch_stage.rb
 - proto_docs/google/api/resource.rb
 - proto_docs/google/cloud/speech/v2/cloud_speech.rb
 - proto_docs/google/longrunning/operations.rb
@@ -213,8 +241,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.3.14
+rubygems_version: 3.4.2
 signing_key:
 specification_version: 4
-summary: API Client library for the Cloud Speech-to-Text V2 API
+summary: Converts audio to text by applying powerful neural network models.
 test_files: []